def main(): # set basic logging format = '%(asctime)s - %(threadName)s - %(name)s - line %(lineno)d - %(levelname)s - %(message)s' logging.getLogger("requests").setLevel(logging.WARNING) logging.basicConfig(level=logging.INFO, format=format) # save Chrome bookmarks chrome = ChromeBrowser() chrome.save_bookmarks()
def main(): # Setup logging logger_dict = get_logger_dict() logging.config.dictConfig(logger_dict) # Set requests to warning as minimum level or it gets very noisy logging.getLogger('requests').setLevel(logging.WARNING) # Save Chrome bookmarks chrome = ChromeBrowser() chrome.save_bookmarks()
def create_browser(host, config, browser_type): if browser_type == 'chrome': browser = ChromeBrowser(host, config.get('chrome_url_log', False)) elif browser_type == 'firefox': browser = FirefoxBrowser(host, False) else: raise Exception('Invalid Browser Type: ' + str(browser_type)) return browser
def worker(url, key, root): page, res = dict(), None with ChromeBrowser(headless=True) as br: t1 = time() page['html'] = br.get(url) page['load'] = time() - t1 page['url'] = url page['key'] = key page['root'] = root data = get_elements(page['html'], REGULARS) res = analyze(page, data) return res
def scan(task, dbconn): data = dict(task) data['root'] = data['root'].split(';') data['urls'] = data['urls'].split('\n') goal = worker(data['url'], data['query'], data['root']) goal = sorted(goal.items(), key=(lambda x: x[0])) data['result'] = goal if data.get('engine'): se = SearchEngineParser(data['query'], data['engine']).scan() data['sites'] = se.sites['sites'] else: data['sites'] = data['urls'] done = dict() with ChromeBrowser(headless=True) as br: for n, s in enumerate(data['sites'], 1): try: _page, _res = dict(), None t1 = time() _page['html'] = br.get(s) _page['load'] = time() - t1 _page['url'] = s _page['key'] = data['query'] _page['root'] = data['root'] _data = get_elements(_page['html'], REGULARS) _res = analyze(_page, _data) done[str(n)] = _res except Exception as e: print(type(e), e) data['done'] = done data['keys'] = sorted(done.keys(), key=lambda x: int(x)) data['fromto'], data['avarage'] = find_avarage(goal, done) data['timestamp'] = data['timestamp'].isoformat() dbconn.execute(Result.insert().values(taskid=data['id'], data=json.dumps(data, ensure_ascii=False))) data['result_url'] = f"http://topomer.site/done/{data['uuid']}" smail(data, data['email']) print('Done: ', data['result_url'])