def add_market():
    root = tkinter.Tk()
    root.withdraw()
    path = filedialog.askdirectory(parent=root,
                                   initialdir="/",
                                   title='请选择上传产品目录')
    if path:
        name = os.path.basename(path)
        path = path.replace('/', '\\')
        market = {'name': name, 'directory': path}
        markets = current_app.data.markets

        if market['name'] not in markets:
            markets[market['name']] = market
            JSON.serialize(markets, '.', 'storage', 'markets.json')
            return market
        else:
            msg = {
                'type':
                'warning',
                'content':
                'The Market of ' + market.name + ' was already in system!'
            }
            emit('notify', msg, room=request.sid)
            return
    else:
        msg = {
            'type': 'primary',
            'content': 'No directory of market was selected.'
        }
        emit('notify', msg, room=request.sid)
        return
Exemple #2
0
    def serialize(self, visitors):
        dates = {}
        for v in visitors:
            date = v['date']
            if date not in dates:
                dates[date] = []
            dates[date].append(v)

        for d in dates:
            fn = 'visitors_'+d+'.json'
            JSON.serialize(dates[d], self.confi_dir, 'visitors', fn)
Exemple #3
0
 def save_tracking_ids(self):
     fn = 'inquiry_tracking_ids_' + self.lname.split(' ')[0] + '.json'
     root = self.market['directory'] + '_config'
     tracking_ids = {}
     for key in self.tracking_ids:
         tracking_ids[key] = {}
         tracking_ids[key]['datetime'] = self.tracking_ids[key][
             'datetime'].to_atom_string()
         tracking_ids[key]['status'] = self.tracking_ids[key]['status']
         if 'emails' in self.tracking_ids[key]:
             tracking_ids[key]['emails'] = self.tracking_ids[key]['emails']
     JSON.serialize(tracking_ids, root, [], fn)
def remove_market(market):
    markets = JSON.deserialize('.', 'storage', 'markets.json')
    if market['name'] in markets:
        del markets[market['name']]
        JSON.serialize(markets, '.', 'storage', 'markets.json')
        return True
    else:
        msg = {
            'type':
            'warning',
            'content':
            'Market ' + market['name'] +
            ' was not found. Try Refesh Your Browser!'
        }
        emit('notify', msg, room=request.sid)
        return False
Exemple #5
0
    def crawl_product_ranking(self, keyword, pages):
        self.current_page = 0
        records = []
        print(keyword, end=': ')

        while self.current_page < pages:
            html = self.next_page(keyword)
            self.crawl_current_page(html, records=records)
        print('length:', len(records), end=', ')
        print('done!')

        obj = {
            'datetime': pendulum.now().to_datetime_string(),
            'records': records
        }
        JSON.serialize(obj, self.market['directory'] + '_config',
                       'products_ranking', keyword + '.json')
        return obj
    def check_balance(self):
        balance = self.browser.find_element_by_css_selector(
            '.sc-manage-edit-price-dialog span[data-role="span-balance"]').text

        if self.broker_url:
            self.balance = self.redis.getset(self.market['name']+'_p4p_balance', balance)

            if float(balance) == self.initial_balance or self.balance is None:
                return
            else:
                self.balance = self.balance.decode()
                if self.balance != balance:
                    diff = format(float(self.balance) - float(balance), '.2f')

                    changes = 0 - float(diff)
                    sub_budget = self.redis.get(self.market['name'] + '_p4p_sub_budget')
                    if sub_budget is not None and float(sub_budget) > 0:
                        if self.redis.incrbyfloat(self.market['name'] + '_p4p_sub_budget', changes) < 0:
                            self.redis.set(self.market['name'] + '_p4p_sub_budget_overflow', True)

                    time_str = arrow.now().format('YYYY-MM-DD HH:mm:ss')
                    date_str = time_str.split(' ')[0]
                    root = self.market['directory'] + '_config'
                    fn = 'p4p_balance_change_history_' + date_str + '.json.gz'
                    JSON.serialize([time_str, diff], root, [], fn, append=True)
        else:
            if self.balance is None:
                self.balance = balance
            elif float(balance) == self.initial_balance:
                return
            elif self.balance != balance:
                diff = format(float(self.balance) - float(balance), '.2f')
                self.balance = balance

                time_str = arrow.now().format('YYYY-MM-DD HH:mm:ss')
                date_str = time_str.split(' ')[0]
                root = self.market['directory'] + '_config'
                fn = 'p4p_balance_change_history_'+date_str+'.json.gz'
                JSON.serialize([time_str, diff], root, [], fn, append=True)
    def crawl_keywords(self):
        keywords = []
        with self.lock:
            self.load_url()

            while True:
                html = self.browser.find_element_by_css_selector("div.keyword-manage .bp-table-main-wraper>table tbody").get_attribute('outerHTML')
                tbody = pq(html)
                trs = tbody.find('tr')

                for tr in trs:
                    kws = {}
                    kws['id'] = pq(tr).find('td:first-child input').val()
                    kws['status'] = pq(tr).find('td.bp-cell-status .bp-dropdown-main i').attr('class').split('-').pop()
                    kws['kws'] = pq(tr).find('td.bp-cell-left').text().strip()
                    groups = pq(tr).find('td[data-role="table-col-tag"]').text().strip()
                    kws['my_price'] = pq(tr).find('td:nth-child(5) a').text().strip()
                    kws['average_price'] = pq(tr).find('td:nth-child(6)').text().strip()
                    string = pq(tr).find('span.qs-star-wrap i').attr('class')
                    kws['match_level'] = re.search('qsstar-(\d+)',string).group(1)
                    string = pq(tr).find('.bp-icon-progress-orange').html()
                    kws['search_count'] = re.search(':(\d+%)',string).group(1)
                    string = pq(tr).find('.bp-icon-progress-blue').html()
                    kws['buy_count'] = re.search(':(\d+%)',string).group(1)
                    for grp in groups.split(','):
                        group = grp.strip()
                        obj = kws.copy()
                        obj['group'] = group
                        keywords.append(obj)

                if not self.next_page():
                    break

        root = self.market['directory'] + '_config'
        fn = 'p4p_keywords_list.json'
        JSON.serialize(keywords, root, [], fn)
        return keywords
 def set_keywords(self, tp, kws_list):
     self.keywords_list[tp] = kws_list
     fn = 'p4p_keywords_list_'+tp+'.json'
     root = self.market['directory'] + '_config'
     JSON.serialize(self.keywords_list[tp], root, [], fn)
 def save_crawling_result(self, keywords):
     root = self.market['directory'] + '_config'
     date_str = keywords[0][0].split(' ')[0]
     fn = 'p4p_keywords_crawl_result_'+date_str+'.json.gz'
     JSON.serialize(keywords, root, [], fn, append=True)
Exemple #10
0
 def del_keywords(self, tp, kws):
     if kws in self.keywords_list[tp]:
         self.keywords_list[tp].remove(kws)
         fn = 'p4p_keywords_list_'+tp+'.json'
         root = self.market['directory'] + '_config'
         JSON.serialize(self.keywords_list[tp], root, [], fn)
Exemple #11
0
 def add_keywords(self, tp, kws):
     self.keywords_list[tp].append(kws)
     fn = 'p4p_keywords_list_'+tp+'.json'
     root = self.market['directory'] + '_config'
     JSON.serialize(self.keywords_list[tp], root, [], fn)
def backgound_crawling_keywords(keyword, website, page_quantity, sid, socketio,
                                market):
    filename = 'keywords.json'
    root = market['directory'] + '_config'

    msg = {'type': "primary", 'content': "打开浏览器 ... ..."}
    socketio.emit('notify', msg, namespace='/markets', room=sid)

    chrome_options = webdriver.ChromeOptions()
    # chrome_options_headless.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    chrome_options.add_argument('--disable-extensions')
    chrome_options.add_argument('--disable-logging')
    chrome_options.add_argument('--ignore-certificate-errors')
    browser = webdriver.Chrome(chrome_options=chrome_options)

    if website == 'alibaba':
        crawler_name = re.sub(' ', '_',
                              keyword) + ' - ' + str(page_quantity) + '页 - 阿里'
        crawler = KwclrAlibaba(browser, keyword, page_quantity, sid, socketio)
    if website == 'alibaba_sp':
        supplier = re.search('https:\/\/([^\.]+)', keyword).group(1)
        category = 'all_products'
        if 'productgrouplist' in keyword:
            category = re.search('\/([^\/]+.html)', keyword).group(1)
        crawler_name = supplier + ' - ' + category + ' - ' + str(
            page_quantity) + '页 - 阿里(商家)'
        crawler = KwclrAliSp(browser, keyword, page_quantity, sid, socketio)
    if website == 'alibaba_sr':
        crawler_name = re.sub(
            '', '_', keyword) + ' - ' + str(page_quantity) + '页 - 阿里(橱窗)'
        crawler = KwclrAliSr(browser, keyword, page_quantity, sid, socketio)
    if website == 'amazon':
        crawler_name = re.sub(
            ' ', '_', keyword) + ' - ' + str(page_quantity) + '页 - Amazon'
        crawler = KwclrAmazon(browser, keyword, page_quantity, sid, socketio)

    msg = {'type': 'primary', 'content': "开始爬取 ... ..."}
    socketio.emit('notify', msg, namespace='/markets', room=sid)

    result = crawler.start()

    msg = {'type': "primary", 'content': "爬取结束,关闭浏览器 ... ..."}
    socketio.emit('notify', msg, namespace='/markets', room=sid)
    browser.quit()

    msg = {'type': "primary", 'content': "保存结果 ... ..."}
    socketio.emit('notify', msg, namespace='/markets', room=sid)
    obj = JSON.deserialize(root, [], filename)
    if not obj:
        obj = {}
    obj[crawler_name] = result
    JSON.serialize(obj, root, [], filename)

    socketio.emit('keyword_crawling_result', {
        'key': crawler_name,
        'result': result
    },
                  namespace='/markets',
                  room=sid)
    browser.quit()
def serialize(obj, market, paths, filename):
    root = market['directory'] + '_config'
    JSON.serialize(obj, root, paths, filename)
    return
def update_market(market):
    markets = JSON.deserialize('.', 'storage', 'markets.json')
    key = market['name']
    if key in markets:
        markets[key] = market
        JSON.serialize(markets, '.', 'storage', 'markets.json')