예제 #1
0
def main():
    # set basic logging
    format = '%(asctime)s - %(threadName)s - %(name)s - line %(lineno)d - %(levelname)s - %(message)s'
    logging.getLogger("requests").setLevel(logging.WARNING)
    logging.basicConfig(level=logging.INFO, format=format)
    
    # save Chrome bookmarks
    chrome = ChromeBrowser()
    chrome.save_bookmarks()
예제 #2
0
def main():
    # Setup logging
    logger_dict = get_logger_dict()
    logging.config.dictConfig(logger_dict)

    # Set requests to warning as minimum level or it gets very noisy
    logging.getLogger('requests').setLevel(logging.WARNING)

    # Save Chrome bookmarks
    chrome = ChromeBrowser()
    chrome.save_bookmarks()
예제 #3
0
def main():
    # Setup logging    
    logger_dict = get_logger_dict()
    logging.config.dictConfig(logger_dict)
    
    # Set requests to warning as minimum level or it gets very noisy
    logging.getLogger('requests').setLevel(logging.WARNING)
    
    # Save Chrome bookmarks
    chrome = ChromeBrowser()
    chrome.save_bookmarks()
예제 #4
0
def create_browser(host, config, browser_type):
    if browser_type == 'chrome':
        browser = ChromeBrowser(host, config.get('chrome_url_log', False))
    elif browser_type == 'firefox':
        browser = FirefoxBrowser(host, False)
    else:
        raise Exception('Invalid Browser Type: ' + str(browser_type))

    return browser
예제 #5
0
def worker(url, key, root):
    page, res = dict(), None
    with ChromeBrowser(headless=True) as br:
        t1 = time()
        page['html'] = br.get(url)
        page['load'] = time() - t1
        page['url'] = url
        page['key'] = key
        page['root'] = root
        data = get_elements(page['html'], REGULARS)
        res = analyze(page, data)
    return res
예제 #6
0
def scan(task, dbconn):
    data = dict(task)
    data['root'] = data['root'].split(';')
    data['urls'] = data['urls'].split('\n')

    goal = worker(data['url'], data['query'], data['root'])
    goal = sorted(goal.items(), key=(lambda x: x[0]))
    data['result'] = goal

    if data.get('engine'):
        se = SearchEngineParser(data['query'], data['engine']).scan()
        data['sites'] = se.sites['sites']
    else:
        data['sites'] = data['urls']

    done = dict()
    with ChromeBrowser(headless=True) as br:
        for n, s in enumerate(data['sites'], 1):
            try:
                _page, _res = dict(), None
                t1 = time()
                _page['html'] = br.get(s)
                _page['load'] = time() - t1
                _page['url'] = s
                _page['key'] = data['query']
                _page['root'] = data['root']
                _data = get_elements(_page['html'], REGULARS)
                _res = analyze(_page, _data)
                done[str(n)] = _res
            except Exception as e:
                print(type(e), e)

    data['done'] = done
    data['keys'] = sorted(done.keys(), key=lambda x: int(x))
    data['fromto'], data['avarage'] = find_avarage(goal, done)
    data['timestamp'] = data['timestamp'].isoformat()

    dbconn.execute(Result.insert().values(taskid=data['id'],
                                          data=json.dumps(data,
                                                          ensure_ascii=False)))

    data['result_url'] = f"http://topomer.site/done/{data['uuid']}"
    smail(data, data['email'])
    print('Done: ', data['result_url'])