def add_market(): root = tkinter.Tk() root.withdraw() path = filedialog.askdirectory(parent=root, initialdir="/", title='请选择上传产品目录') if path: name = os.path.basename(path) path = path.replace('/', '\\') market = {'name': name, 'directory': path} markets = current_app.data.markets if market['name'] not in markets: markets[market['name']] = market JSON.serialize(markets, '.', 'storage', 'markets.json') return market else: msg = { 'type': 'warning', 'content': 'The Market of ' + market.name + ' was already in system!' } emit('notify', msg, room=request.sid) return else: msg = { 'type': 'primary', 'content': 'No directory of market was selected.' } emit('notify', msg, room=request.sid) return
def serialize(self, visitors): dates = {} for v in visitors: date = v['date'] if date not in dates: dates[date] = [] dates[date].append(v) for d in dates: fn = 'visitors_'+d+'.json' JSON.serialize(dates[d], self.confi_dir, 'visitors', fn)
def save_tracking_ids(self): fn = 'inquiry_tracking_ids_' + self.lname.split(' ')[0] + '.json' root = self.market['directory'] + '_config' tracking_ids = {} for key in self.tracking_ids: tracking_ids[key] = {} tracking_ids[key]['datetime'] = self.tracking_ids[key][ 'datetime'].to_atom_string() tracking_ids[key]['status'] = self.tracking_ids[key]['status'] if 'emails' in self.tracking_ids[key]: tracking_ids[key]['emails'] = self.tracking_ids[key]['emails'] JSON.serialize(tracking_ids, root, [], fn)
def get_alibaba(node): if app_data['alibaba'] is not None: return app_data['alibaba'] text = node.split('@')[0] market_name = text.split(':')[0].split('[')[0] lname = text.split(':')[1] if len(text.split(':')) == 2 else None market = JSON.deserialize('.', 'storage', 'markets.json')[market_name] if lname is None or lname == market['lname']: app_data['alibaba'] = Alibaba(market['lid'], market['lpwd'], headless=headless, browser=get_browser()) # app_data['browser'] = app_data['alibaba'].browser else: for account in market['accounts']: print(lname, account) if lname in account['lname']: app_data['alibaba'] = Alibaba(alibaba['lid'], market['lpwd'], headless=headless, browser=get_browser()) # app_data['browser'] = app_data['alibaba'].browser app_data['alibaba'].login() return app_data['alibaba']
def get_products(market, paths): root = market['directory'] path = os.path.join(root, *paths) folders = [ d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d)) ] files = {} for folder in folders: files[folder] = os.listdir(os.path.join(path, folder)) attrs = {} root_config = (market['directory'] + '_config') path_config = os.path.join(root_config, *paths) if os.path.exists(path_config): folders_config = os.listdir(path_config) for folder in folders_config: if not os.path.exists(os.path.join(path_config, folder)): continue if os.path.isfile(os.path.join(path_config, folder)): continue files_config = os.listdir(os.path.join(path_config, folder)) for file in files_config: if not file.endswith('_attributes.json'): continue pid = file.split('_')[0] ps = paths[:] ps.append(folder) attrs[folder + '_' + pid] = JSON.deserialize( root_config, ps, file) return dict(folders=folders, files=files, attributes=attrs)
def get_visitor(node): if 'visitor' in app_data: return app_data['visitor'] text = node.split('@')[0] market_name = text.split(':')[0].split('[')[0] lname = text.split(':')[1] if len(text.split(':')) == 2 else None market = JSON.deserialize('.', 'storage', 'markets.json')[market_name] if lname is None or lname == market['lname']: app_data['visitor'] = Visitor(market, headless=headless, browser=get_browser()) # app_data['browser'] = app_data['visitor'].browser else: for account in market['accounts']: print(lname, account) if lname in account['lname']: app_data['visitor'] = Visitor(market, account, headless=headless, browser=get_browser()) # app_data['browser'] = app_data['visitor'].browser return app_data['visitor']
def remove_market(market): markets = JSON.deserialize('.', 'storage', 'markets.json') if market['name'] in markets: del markets[market['name']] JSON.serialize(markets, '.', 'storage', 'markets.json') return True else: msg = { 'type': 'warning', 'content': 'Market ' + market['name'] + ' was not found. Try Refesh Your Browser!' } emit('notify', msg, room=request.sid) return False
def get_market(name): markets = JSON.deserialize('.', 'storage', 'markets.json') if name in markets: market = markets[name] else: market = None return market
def get_p4p(node): if app_data['p4p'] is not None: return app_data['p4p'] text = node.split('@')[0] market_name = text.split(':')[0].split('[')[0] lname = text.split(':')[1] if len(text.split(':')) == 2 else None market = JSON.deserialize('.', 'storage', 'markets.json')[market_name] if lname is None or lname == market['lname']: lid = market['lid'] lpwd = market['lpwd'] lname = market['lname'] app_data['p4p'] = P4P(market, lid, lpwd, broker_url=app.conf.broker_url, browser=get_browser(), headless=headless) # app_data['browser'] = app_data['p4p'] else: for account in market['accounts']: if lname in account['lname']: lid = account['lid'] lpwd = account['lpwd'] app_data['p4p'] = P4P(market, lid, lpwd, broker_url=app.conf.broker_url, browser=get_browser(), headless=headless) # app_data['browser'] = app_data['p4p'] return app_data['p4p']
def settings(): markets = JSON.deserialize('.', 'storage', 'markets.json') if markets is None: markets = {} response = make_response( render_template('settings/index.html', markets=markets)) return response
def deserialize(self): files = os.listdir(self.visitor_dir) visitors = [] for f in files: if not f.startswith('visitors_') or not f.endswith('.json'): continue visitors += JSON.deserialize(self.confi_dir, 'visitors', f) return visitors
def crawl_product_ranking(self, keyword, pages): self.current_page = 0 records = [] print(keyword, end=': ') while self.current_page < pages: html = self.next_page(keyword) self.crawl_current_page(html, records=records) print('length:', len(records), end=', ') print('done!') obj = { 'datetime': pendulum.now().to_datetime_string(), 'records': records } JSON.serialize(obj, self.market['directory'] + '_config', 'products_ranking', keyword + '.json') return obj
def load_keywords(self, tp): if not tp: return else: fn = 'p4p_keywords_list_'+tp+'.json' root = self.market['directory'] + '_config' kws_list = JSON.deserialize(root, [], fn) if kws_list is None: kws_list = [] self.keywords_list[tp] = kws_list
def deserialize(market, paths, filename, shallow=False): root = (market['directory'] + '_config') if shallow: return JSON.deserialize(root, paths, filename) objects = [] while True: objects.append(JSON.deserialize(root, paths[:], filename)) if len(paths) == 0: break if len(paths) and paths[-1].lower().endswith( ' serie') and '_' in filename: filename = filename.split('_')[1] else: paths.pop() return objects
def markets(name): markets = JSON.deserialize('.', 'storage', 'markets.json') if markets is None: markets = {} market = None else: market = markets[name] response = make_response( render_template('markets/index.html', markets=markets, market=market)) return response
def get_products_ranking(node): if app_data['products_ranking'] is not None: return app_data['products_ranking'] text = node.split('@')[0] market_name = text.split(':')[0].split('[')[0] market = JSON.deserialize('.', 'storage', 'markets.json')[market_name] pr = ProductsRanking(market=market) app_data['products_ranking'] = pr return pr
def check_balance(self): balance = self.browser.find_element_by_css_selector( '.sc-manage-edit-price-dialog span[data-role="span-balance"]').text if self.broker_url: self.balance = self.redis.getset(self.market['name']+'_p4p_balance', balance) if float(balance) == self.initial_balance or self.balance is None: return else: self.balance = self.balance.decode() if self.balance != balance: diff = format(float(self.balance) - float(balance), '.2f') changes = 0 - float(diff) sub_budget = self.redis.get(self.market['name'] + '_p4p_sub_budget') if sub_budget is not None and float(sub_budget) > 0: if self.redis.incrbyfloat(self.market['name'] + '_p4p_sub_budget', changes) < 0: self.redis.set(self.market['name'] + '_p4p_sub_budget_overflow', True) time_str = arrow.now().format('YYYY-MM-DD HH:mm:ss') date_str = time_str.split(' ')[0] root = self.market['directory'] + '_config' fn = 'p4p_balance_change_history_' + date_str + '.json.gz' JSON.serialize([time_str, diff], root, [], fn, append=True) else: if self.balance is None: self.balance = balance elif float(balance) == self.initial_balance: return elif self.balance != balance: diff = format(float(self.balance) - float(balance), '.2f') self.balance = balance time_str = arrow.now().format('YYYY-MM-DD HH:mm:ss') date_str = time_str.split(' ')[0] root = self.market['directory'] + '_config' fn = 'p4p_balance_change_history_'+date_str+'.json.gz' JSON.serialize([time_str, diff], root, [], fn, append=True)
def crawl_keywords(self): keywords = [] with self.lock: self.load_url() while True: html = self.browser.find_element_by_css_selector("div.keyword-manage .bp-table-main-wraper>table tbody").get_attribute('outerHTML') tbody = pq(html) trs = tbody.find('tr') for tr in trs: kws = {} kws['id'] = pq(tr).find('td:first-child input').val() kws['status'] = pq(tr).find('td.bp-cell-status .bp-dropdown-main i').attr('class').split('-').pop() kws['kws'] = pq(tr).find('td.bp-cell-left').text().strip() groups = pq(tr).find('td[data-role="table-col-tag"]').text().strip() kws['my_price'] = pq(tr).find('td:nth-child(5) a').text().strip() kws['average_price'] = pq(tr).find('td:nth-child(6)').text().strip() string = pq(tr).find('span.qs-star-wrap i').attr('class') kws['match_level'] = re.search('qsstar-(\d+)',string).group(1) string = pq(tr).find('.bp-icon-progress-orange').html() kws['search_count'] = re.search(':(\d+%)',string).group(1) string = pq(tr).find('.bp-icon-progress-blue').html() kws['buy_count'] = re.search(':(\d+%)',string).group(1) for grp in groups.split(','): group = grp.strip() obj = kws.copy() obj['group'] = group keywords.append(obj) if not self.next_page(): break root = self.market['directory'] + '_config' fn = 'p4p_keywords_list.json' JSON.serialize(keywords, root, [], fn) return keywords
def __init__(self, market, account=None, headless=True, browser=None): self.market = market self.account = account self.lid = account['lid'] if account else market['lid'] self.lpwd = account['lpwd'] if account else market['lpwd'] self.lname = account['lname'] if account else market['lname'] self.mobile = account['mobile'] if account else market['mobile'] self.account = {} self.account['lid'] = self.lid self.account['lpwd'] = self.lpwd self.account['lname'] = self.lname self.account['mobile'] = self.mobile self.confi_dir = market['directory'] + '_config' self.visitor_dir = self.confi_dir + '//'+'visitors' self.alibaba = None self.browser = browser self.headless = headless self.products = {} root = market['directory']+'_config' product_list = JSON.deserialize(root, '.', 'product_list.json') for p in product_list: self.products[p['ali_id']] = p rp = {} rp['mink eyelash'] = {'ali_id': '60761530720', 'price': 1.5} rp['silk eyelash'] = {'ali_id': '60795757606', 'price': 0.8} rp['magnetic eyelash'] = {'ali_id': '60763607812', 'price': 1.1} rp['individual eyelash'] = {'ali_id': '60732345735', 'price': 2.3} rp['flat eyelash'] = {'ali_id': '60764332933', 'price': 3.9} rp['premade fans glue bonded'] = {'ali_id': '60762376749', 'price': 0.9} rp['premade fans heat bonded'] = {'ali_id': '60795873604', 'price': 1.9} self.recommended = {} self.recommended['mink eyelash'] = [rp['mink eyelash'], rp['silk eyelash'], rp['magnetic eyelash']] self.recommended['silk eyelash'] = [rp['silk eyelash'], rp['mink eyelash'], rp['magnetic eyelash']] self.recommended['magnetic eyelash'] = [rp['magnetic eyelash'], rp['mink eyelash'], rp['silk eyelash']] self.recommended['flat eyelash'] = [rp['flat eyelash'], rp['individual eyelash'], rp['premade fans glue bonded'], rp['premade fans heat bonded']] self.recommended['individual eyelash'] = [rp['individual eyelash'], rp['flat eyelash'], rp['premade fans glue bonded'], rp['premade fans heat bonded']] self.recommended['glue bonded'] = [rp['premade fans glue bonded'], rp['premade fans heat bonded'], rp['individual eyelash'], rp['flat eyelash']] self.recommended['heat bonded'] = [rp['premade fans heat bonded'], rp['premade fans glue bonded'], rp['individual eyelash'], rp['flat eyelash']] self.recommended['premade fans'] = self.recommended['glue bonded'] self.recommended['default'] = self.recommended['mink eyelash'] + self.recommended['individual eyelash'] self.mail_message = "Hi,\\nNice Day. This is Ada.\\nThanks for your visit to our products.\\nWould you pls tell us your WhatsApp number? I would like to send our product catalog and price list to you. Thanks\\nMy WhatsApp is +8618563918130.\\n\\nRegards\\nAda"
def load_tracking_ids(self): fn = 'inquiry_tracking_ids_' + self.lname.split(' ')[0] + '.json' root = self.market['directory'] + '_config' tracking_ids = JSON.deserialize(root, [], fn) self.tracking_ids = {} if tracking_ids is not None: for key in tracking_ids: d = pendulum.parse(tracking_ids[key]['datetime']) if d.diff().in_hours() <= 24: self.tracking_ids[key] = {} self.tracking_ids[key]['datetime'] = d self.tracking_ids[key]['status'] = tracking_ids[key][ 'status'] if 'emails' in tracking_ids[key]: self.tracking_ids[key]['emails'] = tracking_ids[key][ 'emails']
def get_products_rankings(market, keywords): root = market['directory'] + '_config' products_rankings_dir = root + '//' + 'products_ranking' products_rankings = [] for kw in keywords: file = products_rankings_dir + '//' + kw + '.json' if not os.path.isfile(file): continue obj = JSON.deserialize(root, 'products_ranking', file) obj['keyword'] = kw products_rankings.append(obj) return products_rankings
def get_visitors(market, start_date='2018-08-01', end_date=None): start = pendulum.parse(start_date) if end_date: end = pendulum.parse(end_date) else: end = pendulum.now() root = market['directory'] + '_config' visitor_dir = root + '//' + 'visitors' files = os.listdir(visitor_dir) visitors = [] for f in files: if not f.startswith('visitors_') or not f.endswith('.json'): continue dt = pendulum.parse(re.search('visitors_(.*).json', f).group(1)) if start <= dt <= end: visitors += JSON.deserialize(root, 'visitors', f) return visitors
def create_app(debug=True): """Create an application.""" app = Flask(__name__) app.debug = debug app.config['SECRET_KEY'] = 'gjr39dkjn344_!67#' from .main import main as main_blueprint app.register_blueprint(main_blueprint) socketio.init_app(app) if app.debug and os.environ.get('WERKZEUG_RUN_MAIN') != 'true': return app data = SimpleNamespace() data.reserve_title_mutex = threading.Lock() # # load products # markets = JSON.deserialize('.', 'storage', 'markets.json') if not markets: markets = {} data.markets = markets chrome_options_headless = webdriver.ChromeOptions() # chrome_options_headless.add_argument('--headless') chrome_options_headless.add_argument('--disable-gpu') chrome_options_headless.add_argument('--disable-extensions') chrome_options_headless.add_argument('--disable-logging') chrome_options_headless.add_argument('--ignore-certificate-errors') data.chrome_options = chrome_options_headless data.alibaba = None app.data = data return app
def add_keywords(self, tp, kws): self.keywords_list[tp].append(kws) fn = 'p4p_keywords_list_'+tp+'.json' root = self.market['directory'] + '_config' JSON.serialize(self.keywords_list[tp], root, [], fn)
def serialize(obj, market, paths, filename): root = market['directory'] + '_config' JSON.serialize(obj, root, paths, filename) return
def backgound_crawling_keywords(keyword, website, page_quantity, sid, socketio, market): filename = 'keywords.json' root = market['directory'] + '_config' msg = {'type': "primary", 'content': "打开浏览器 ... ..."} socketio.emit('notify', msg, namespace='/markets', room=sid) chrome_options = webdriver.ChromeOptions() # chrome_options_headless.add_argument('--headless') chrome_options.add_argument('--disable-gpu') chrome_options.add_argument('--disable-extensions') chrome_options.add_argument('--disable-logging') chrome_options.add_argument('--ignore-certificate-errors') browser = webdriver.Chrome(chrome_options=chrome_options) if website == 'alibaba': crawler_name = re.sub(' ', '_', keyword) + ' - ' + str(page_quantity) + '页 - 阿里' crawler = KwclrAlibaba(browser, keyword, page_quantity, sid, socketio) if website == 'alibaba_sp': supplier = re.search('https:\/\/([^\.]+)', keyword).group(1) category = 'all_products' if 'productgrouplist' in keyword: category = re.search('\/([^\/]+.html)', keyword).group(1) crawler_name = supplier + ' - ' + category + ' - ' + str( page_quantity) + '页 - 阿里(商家)' crawler = KwclrAliSp(browser, keyword, page_quantity, sid, socketio) if website == 'alibaba_sr': crawler_name = re.sub( '', '_', keyword) + ' - ' + str(page_quantity) + '页 - 阿里(橱窗)' crawler = KwclrAliSr(browser, keyword, page_quantity, sid, socketio) if website == 'amazon': crawler_name = re.sub( ' ', '_', keyword) + ' - ' + str(page_quantity) + '页 - Amazon' crawler = KwclrAmazon(browser, keyword, page_quantity, sid, socketio) msg = {'type': 'primary', 'content': "开始爬取 ... ..."} socketio.emit('notify', msg, namespace='/markets', room=sid) result = crawler.start() msg = {'type': "primary", 'content': "爬取结束,关闭浏览器 ... ..."} socketio.emit('notify', msg, namespace='/markets', room=sid) browser.quit() msg = {'type': "primary", 'content': "保存结果 ... ..."} socketio.emit('notify', msg, namespace='/markets', room=sid) obj = JSON.deserialize(root, [], filename) if not obj: obj = {} obj[crawler_name] = result JSON.serialize(obj, root, [], filename) socketio.emit('keyword_crawling_result', { 'key': crawler_name, 'result': result }, namespace='/markets', room=sid) browser.quit()
def execute(market): print(market['name']) p4p = P4P(market, market['lid'], market['lpwd']) if market['name'] == 'Eyelashes': time.sleep(3) group = '直通车App' if market['name'] == 'Tools': group = '0直通车' p4p.monitor(group=group) time.sleep(30) p4p.turn_all_off(group=group) if __name__ == '__main__': market_eyelash = JSON.deserialize('.', 'storage', 'markets.json')['Eyelashes'] market_tools = JSON.deserialize('.', 'storage', 'markets.json')['Tools'] proc_eyelash = Process(target=execute, args=[market_eyelash]) proc_eyelash.daemon = True proc_tools = Process(target=execute, args=[market_tools]) proc_tools.daemon = True proc_eyelash.start() proc_tools.start() proc_eyelash.join() proc_tools.join() print('process is end')
def set_keywords(self, tp, kws_list): self.keywords_list[tp] = kws_list fn = 'p4p_keywords_list_'+tp+'.json' root = self.market['directory'] + '_config' JSON.serialize(self.keywords_list[tp], root, [], fn)
def save_crawling_result(self, keywords): root = self.market['directory'] + '_config' date_str = keywords[0][0].split(' ')[0] fn = 'p4p_keywords_crawl_result_'+date_str+'.json.gz' JSON.serialize(keywords, root, [], fn, append=True)
def del_keywords(self, tp, kws): if kws in self.keywords_list[tp]: self.keywords_list[tp].remove(kws) fn = 'p4p_keywords_list_'+tp+'.json' root = self.market['directory'] + '_config' JSON.serialize(self.keywords_list[tp], root, [], fn)