예제 #1
0
def execute_command(command, webdriver, proxy_queue, browser_settings, browser_params, manager_params, extension_socket):
    """
    executes BrowserManager commands by passing command tuples into necessary helper function
    commands are of form (COMMAND, ARG0, ARG1, ...)
    the only imports in this file should be imports to helper libraries
    """
    if command[0] == 'GET':
        browser_commands.get_website(command[1], command[2], webdriver,
                                     proxy_queue, browser_params, extension_socket)

    if command[0] == 'BROWSE':
        browser_commands.browse_website(command[1], command[2], command[3],
                                        webdriver, proxy_queue, browser_params,
                                        manager_params, extension_socket)

    if command[0] == 'DUMP_FLASH_COOKIES':
        browser_commands.dump_flash_cookies(command[1], command[2], webdriver,
                                              browser_params, manager_params)

    if command[0] == 'DUMP_PROFILE_COOKIES':
        browser_commands.dump_profile_cookies(command[1], command[2], webdriver,
                                              browser_params, manager_params)

    if command[0] == 'DUMP_PROF':
        profile_commands.dump_profile(browser_params['profile_path'], manager_params,
                                      browser_params, command[1], command[2], webdriver,
                                      browser_settings, compress=command[3],
                                      save_flash=browser_params['disable_flash'] is False)

    if command[0] == 'EXTRACT_LINKS':
        browser_commands.extract_links(webdriver, browser_params, manager_params)
예제 #2
0
def execute_command(command, webdriver, proxy_queue, browser_settings,
                    browser_params):
    """
    executes BrowserManager commands by passing command tuples into necessary helper function
    commands are of form (COMMAND, ARG0, ARG1, ...)
    the only imports in this file should be imports to helper libraries
    """
    if command[0] == 'GET':
        browser_commands.get_website(command[1], webdriver, proxy_queue,
                                     browser_params)

    if command[0] == 'DUMP_STORAGE_VECTORS':
        browser_commands.dump_storage_vectors(command[1], command[2],
                                              webdriver, browser_params)

    if command[0] == 'DUMP_PROF':
        profile_commands.dump_profile(
            browser_params['profile_path'],
            command[1],
            command[2],
            webdriver,
            browser_settings,
            save_flash=browser_params['disable_flash'] is False)

    if command[0] == 'EXTRACT_LINKS':
        browser_commands.extract_links(webdriver, browser_params)
def execute_command(command, webdriver, proxy_queue, browser_settings, browser_params):
    """
    executes BrowserManager commands by passing command tuples into necessary helper function
    commands are of form (COMMAND, ARG0, ARG1, ...)
    the only imports in this file should be imports to helper libraries
    """
    if command[0] == 'GET':
        browser_commands.get_website(command[1], webdriver, proxy_queue, browser_params)
        
    if command[0] == 'BROWSE':
        browser_commands.browse_website(command[1], command[2], webdriver, proxy_queue, browser_params)
    
    if command[0] == 'DUMP_STORAGE_VECTORS':
        browser_commands.dump_storage_vectors(command[1], command[2], webdriver, browser_params)

    if command[0] == 'DUMP_PROF':
        profile_commands.dump_profile(browser_params['profile_path'],
                                      command[1], command[2], webdriver, browser_settings,
                                      save_flash=browser_params['disable_flash'] is False)

    if command[0] == 'EXTRACT_LINKS':
        browser_commands.extract_links(webdriver, browser_params)

    if command[0] == 'FBLOGIN':
        facebook_commands.login(webdriver, command[1], command[2], browser_params)

    if command[0] == 'EXTRACT_FRIENDS':
        facebook_commands.extract_friends(command[1], command[2], command[3], webdriver, browser_params)
예제 #4
0
def execute_command(command, webdriver, proxy_queue, browser_settings, browser_params, manager_params, extension_socket):
    """
    executes BrowserManager commands by passing command tuples into necessary helper function
    commands are of form (COMMAND, ARG0, ARG1, ...)
    the only imports in this file should be imports to helper libraries
    """
    if command[0] == 'GET':
        browser_commands.get_website(command[1], webdriver,
                                     proxy_queue, browser_params, extension_socket)

    if command[0] == 'BROWSE':
        browser_commands.browse_website(command[1], command[2], webdriver,
                                        proxy_queue, browser_params, manager_params,
                                        extension_socket)

    if command[0] == 'DUMP_FLASH_COOKIES':
        browser_commands.dump_flash_cookies(command[1], command[2], webdriver,
                                              browser_params, manager_params)

    if command[0] == 'DUMP_PROFILE_COOKIES':
        browser_commands.dump_profile_cookies(command[1], command[2], webdriver,
                                              browser_params, manager_params)

    if command[0] == 'DUMP_PROF':
        profile_commands.dump_profile(browser_params['profile_path'], manager_params,
                                      browser_params, command[1], command[2], webdriver,
                                      browser_settings, compress=command[3],
                                      save_flash=browser_params['disable_flash'] is False)

    if command[0] == 'EXTRACT_LINKS':
        browser_commands.extract_links(webdriver, browser_params, manager_params)
예제 #5
0
def execute_command(command, webdriver, proxy_queue, browser_settings, browser_params, manager_params, extension_socket):
    """
    executes BrowserManager commands by passing command tuples into necessary helper function
    commands are of form (COMMAND, ARG0, ARG1, ...)
    the only imports in this file should be imports to helper libraries
    """
    if command[0] == 'GET':
        browser_commands.get_website(url=command[1], sleep=command[2], scroll=command[3], visit_id=command[4],
                                     webdriver=webdriver, proxy_queue=proxy_queue,
                                     browser_params=browser_params, extension_socket=extension_socket)

    if command[0] == 'BROWSE':
        browser_commands.browse_website(url=command[1], num_links=command[2], sleep=command[3],
                                        visit_id=command[4], webdriver=webdriver,
                                        proxy_queue=proxy_queue, browser_params=browser_params,
                                        manager_params=manager_params, extension_socket=extension_socket)

    if command[0] == 'DUMP_FLASH_COOKIES':
        browser_commands.dump_flash_cookies(start_time=command[1], visit_id=command[2],
                                            webdriver=webdriver, browser_params=browser_params,
                                            manager_params=manager_params)

    if command[0] == 'DUMP_PROFILE_COOKIES':
        browser_commands.dump_profile_cookies(start_time=command[1], visit_id=command[2],
                                              webdriver=webdriver, browser_params=browser_params,
                                              manager_params=manager_params)

    if command[0] == 'DUMP_PROF':
        profile_commands.dump_profile(browser_profile_folder=browser_params['profile_path'],
                                      manager_params=manager_params,
                                      browser_params=browser_params,
                                      tar_location=command[1], close_webdriver=command[2],
                                      webdriver=webdriver, browser_settings=browser_settings,
                                      compress=command[3],
                                      save_flash=browser_params['disable_flash'] is False)

    if command[0] == 'EXTRACT_LINKS':
        browser_commands.extract_links(webdriver, browser_params, manager_params)

    if command[0] == 'SAVE_SCREENSHOT':
        browser_commands.save_screenshot(screenshot_name=command[1], webdriver=webdriver,
                                         browser_params=browser_params, manager_params=manager_params)

    if command[0] == 'DUMP_PAGE_SOURCE':
        browser_commands.dump_page_source(dump_name=command[1], webdriver=webdriver,
                                          browser_params=browser_params, manager_params=manager_params)

    if command[0] == 'EXTRACT_ELEMENTS':
        browser_commands.extract_elements(selector=command[1], webdriver=webdriver,
                                          browser_params=browser_params, manager_params=manager_params)
    if command[0] == 'RUN_CUSTOM_FUNCTION':
        arg_dict = {"command": command,
                    "driver": webdriver,
                    "proxy_queue": proxy_queue,
                    "browser_settings": browser_settings,
                    "browser_params": browser_params,
                    "manager_params": manager_params,
                    "extension_socket": extension_socket}
        command[1](*command[2], **arg_dict)
def execute_command(command, webdriver, proxy_queue, browser_settings, browser_params):
    """
    executes BrowserManager commands by passing command tuples into necessary helper function
    commands are of form (COMMAND, ARG0, ARG1, ...)
    the only imports in this file should be imports to helper libraries
    """
    if command[0] == 'GET':
        browser_commands.get_website(command[1], webdriver, proxy_queue, browser_params)
    
    if command[0] == 'DUMP_STORAGE_VECTORS':
        browser_commands.dump_storage_vectors(command[1], command[2], webdriver, browser_params)

    if command[0] == 'DUMP_PROF':
        profile_commands.dump_profile(browser_params['profile_path'],
                                      command[1], command[2], webdriver, browser_settings,
                                      save_flash=browser_params['disable_flash'] is False)
예제 #7
0
def fill_forms(url, email_producer, num_links, page_timeout, debug, visit_id,
               webdriver, proxy_queue, browser_params, manager_params,
               extension_socket):
    """Finds a newsletter form on the page. If not found, visits <num_links>
    internal links and scans those pages for a form. Submits the form if found.
    """
    # load the site
    webdriver.set_page_load_timeout(page_timeout)
    get_website(url, 0, visit_id, webdriver, proxy_queue, browser_params,
                extension_socket)

    # connect to the logger
    logger = loggingclient(*manager_params['logger_address'])

    # try to find a newsletter form on the landing page
    if _find_and_fill_form(webdriver, email_producer, visit_id, debug,
                           browser_params, manager_params, logger):
        return

    # otherwise, scan more pages
    main_handle = webdriver.current_window_handle
    visited_links = set()
    for i in xrange(num_links):
        # get all links on the page
        links = webdriver.find_elements_by_tag_name('a')
        random.shuffle(links)

        current_url = webdriver.current_url
        current_ps1 = domain_utils.get_ps_plus_1(current_url)

        # find links to click
        match_links = []
        start_time = timeit.default_timer()
        for link in links:
            try:
                if not link.is_displayed():
                    continue

                # check if link is valid and not already visited
                href = link.get_attribute('href')
                if href is None or href in visited_links:
                    continue

                # check if this is an internal link
                if not _is_internal_link(href, current_url, current_ps1):
                    continue

                link_text = link.text.lower()

                # skip links with blacklisted text
                blacklisted = False
                for bl_text in _LINK_TEXT_BLACKLIST:
                    if bl_text in link_text:
                        blacklisted = True
                        break
                if blacklisted:
                    continue

                # should we click this link?
                link_rank = 0
                for type, s, rank, flags in _LINK_TEXT_RANK:
                    if (type == _TYPE_TEXT
                            and s in link_text) or (type == _TYPE_HREF
                                                    and s in href):
                        if flags & _FLAG_IN_NEW_URL_ONLY:
                            # don't use this link if the current page URL already matches too
                            if type == _TYPE_HREF and s in current_url:
                                continue

                        # link matches!
                        link_rank = rank
                        match_links.append(
                            (link, rank, link_text, href, flags))
                        break
                if link_rank >= _LINK_RANK_SKIP:  # good enough, stop looking
                    break
            except:
                logger.error("error while looping through links...")

            # quit if too much time passed (for some reason, this is really slow...)
            if match_links and timeit.default_timer(
            ) - start_time > _LINK_MATCH_TIMEOUT:
                break

        # find the best link to click
        if not match_links:
            break  # no more links to click
        match_links.sort(key=lambda l: l[1])
        next_link = match_links[-1]
        visited_links.add(next_link[3])

        # click the link
        try:
            # load the page
            logger.info("clicking on link '%s' - %s" %
                        (next_link[2], next_link[3]))
            next_link[0].click()
            wait_until_loaded(webdriver, _PAGE_LOAD_TIME)
            if browser_params['bot_mitigation']:
                bot_mitigation(webdriver)

            # find newsletter form
            if _find_and_fill_form(webdriver, email_producer, visit_id, debug,
                                   browser_params, manager_params, logger):
                return

            # should we stay on this page?
            if next_link[4] & _FLAG_STAY_ON_PAGE:
                continue

            # go back
            webdriver.back()
            wait_until_loaded(webdriver, _PAGE_LOAD_TIME)

            # check other windows (ex. pop-ups)
            windows = webdriver.window_handles
            if len(windows) > 1:
                form_found_in_popup = False
                for window in windows:
                    if window != main_handle:
                        webdriver.switch_to_window(window)
                        wait_until_loaded(webdriver, _PAGE_LOAD_TIME)

                        # find newsletter form
                        if _find_and_fill_form(webdriver, email_producer,
                                               visit_id, debug, browser_params,
                                               manager_params, logger):
                            form_found_in_popup = True

                        webdriver.close()
                webdriver.switch_to_window(main_handle)
                time.sleep(1)

                if form_found_in_popup:
                    return
        except:
            pass
예제 #8
0
def execute_command(command, webdriver, proxy_queue, browser_settings, browser_params, manager_params, extension_socket):
    """
    executes BrowserManager commands by passing command tuples into necessary helper function
    commands are of form (COMMAND, ARG0, ARG1, ...)
    the only imports in this file should be imports to helper libraries
    """
    if command[0] == 'GET':
        browser_commands.get_website(url=command[1], sleep=command[2], visit_id=command[3],
                                     webdriver=webdriver, proxy_queue=proxy_queue,
                                     browser_params=browser_params, extension_socket=extension_socket)

    if command[0] == 'BROWSE':
        browser_commands.browse_website(url=command[1], num_links=command[2], sleep=command[3],
                                        visit_id=command[4], webdriver=webdriver,
                                        proxy_queue=proxy_queue, browser_params=browser_params,
                                        manager_params=manager_params, extension_socket=extension_socket)

    if command[0] == 'DUMP_FLASH_COOKIES':
        browser_commands.dump_flash_cookies(start_time=command[1], visit_id=command[2],
                                            webdriver=webdriver, browser_params=browser_params,
                                            manager_params=manager_params)

    if command[0] == 'DUMP_PROFILE_COOKIES':
        browser_commands.dump_profile_cookies(start_time=command[1], visit_id=command[2],
                                              webdriver=webdriver, browser_params=browser_params,
                                              manager_params=manager_params)

    if command[0] == 'DUMP_PROF':
        return
        profile_commands.dump_profile(browser_profile_folder=browser_params['profile_path'],
                                      manager_params=manager_params,
                                      browser_params=browser_params,
                                      tar_location=command[1], close_webdriver=command[2],
                                      webdriver=webdriver, browser_settings=browser_settings,
                                      compress=command[3],
                                      save_flash=browser_params['disable_flash'] is False)


    if command[0] == 'EXTRACT_LINKS':
        browser_commands.extract_links(webdriver, browser_params, manager_params)
        #======
        browser_commands.extract_links_with_extension(webdriver, browser_params, manager_params)
    if command[0] == 'LOGIN':
        #browser_commands.extract_links(webdriver, browser_params, manager_params)
        browser_commands.login_google(webdriver, browser_params, manager_params)

    if command[0] == 'LOAD_PROFILE':
        # Do not use this command! There is built-in function that
        # can be activated by browser_params.json
        profile_commands.load_profile(browser_profile_folder=browser_params['profile_path'],
                                      manager_params=manager_params,
                                      browser_params=browser_params)
    #if command[0] == 'SEARCH_GOOGLE_SHOP':
    #    browser_commands.search_google_shopping(webdriver=webdriver,
    #                                            browser_params=browser_params,
    #                                            manager_params=manager_params,
    #                                            number_of_links_to_click=3,
    #                                            training=True)
    if command[0] == 'SINGLE_SEARCH_GOOGLE_SHOP':
        browser_commands.single_search_google_shopping(webdriver=webdriver,
                                                term = command[1],
                                                browser_params=browser_params,
                                                manager_params=manager_params,
                                                number_of_links_to_click=command[2],
                                                training=command[3])
    if command[0] == 'SINGLE_SEARCH_GOOGLE_SHOP_BY_INDEX':
        browser_commands.single_search_google_shopping_by_index(webdriver=webdriver,
                                                index_of_term = command[1],
                                                browser_params=browser_params,
                                                manager_params=manager_params,
                                                training=command[2])
    # usage:

    if command[0] == 'MULTIPLE_SEARCH_GOOGLE_SHOP':
        browser_commands.multiple_search_google_shopping(webdriver=webdriver,
                                                browser_params=browser_params,
                                                manager_params=manager_params,
                                                number_of_links_to_click=command[1],
                                                training=command[2])

    if command[0] == 'BROWSE_TRAINING_SITE_BY_INDEX':
        browser_commands.browser_website_by_index(index_of_url=command[1], sleep=command[2], visit_id=command[3],
                                     webdriver=webdriver, proxy_queue=proxy_queue,
                                     browser_params=browser_params,manager_params=manager_params, extension_socket=extension_socket)
def execute_command(command, webdriver, proxy_queue, browser_settings,
                    browser_params, manager_params, extension_sockets):
    """
    executes BrowserManager commands by passing command tuples into necessary helper function
    commands are of form (COMMAND, ARG0, ARG1, ...)
    the only imports in this file should be imports to helper libraries
    """
    if command[0] == 'GET':
        browser_commands.get_website(
            url=command[1], sleep=command[2], visit_id=command[3],
            webdriver=webdriver, proxy_queue=proxy_queue,
            browser_params=browser_params,
            extension_sockets=extension_sockets
        )

    if command[0] == 'BROWSE':
        browser_commands.browse_website(
            url=command[1], num_links=command[2], sleep=command[3],
            visit_id=command[4], webdriver=webdriver,
            proxy_queue=proxy_queue, browser_params=browser_params,
            manager_params=manager_params,
            extension_sockets=extension_sockets
        )

    if command[0] == 'DUMP_FLASH_COOKIES':
        browser_commands.dump_flash_cookies(start_time=command[1], visit_id=command[2],
                                            webdriver=webdriver, browser_params=browser_params,
                                            manager_params=manager_params)

    if command[0] == 'DUMP_PROFILE_COOKIES':
        browser_commands.dump_profile_cookies(start_time=command[1], visit_id=command[2],
                                              webdriver=webdriver, browser_params=browser_params,
                                              manager_params=manager_params)

    if command[0] == 'DUMP_PROF':
        profile_commands.dump_profile(browser_profile_folder=browser_params['profile_path'],
                                      manager_params=manager_params,
                                      browser_params=browser_params,
                                      tar_location=command[1], close_webdriver=command[2],
                                      webdriver=webdriver, browser_settings=browser_settings,
                                      compress=command[3],
                                      save_flash=browser_params['disable_flash'] is False)

    if command[0] == 'EXTRACT_LINKS':
        browser_commands.extract_links(webdriver, browser_params, manager_params)

    if command[0] == 'SAVE_SCREENSHOT':
        browser_commands.save_screenshot(screenshot_name=command[1], webdriver=webdriver,
                                         browser_params=browser_params, manager_params=manager_params)

    if command[0] == 'DUMP_PAGE_SOURCE':
        browser_commands.dump_page_source(dump_name=command[1], webdriver=webdriver,
                                          browser_params=browser_params, manager_params=manager_params)

    if command[0] == 'BROWSE_AND_DUMP_SOURCE':
        browser_commands.browse_and_dump_source(
            url=command[1],
            num_links=command[2],
            sleep=command[3],
            visit_id=command[4],
            webdriver=webdriver,
            proxy_queue=proxy_queue,
            browser_params=browser_params,
            manager_params=manager_params,
            extension_sockets=extension_sockets
        )

    if command[0] == 'RECURSIVE_DUMP_PAGE_SOURCE':
        browser_commands.recursive_dump_page_source(
            visit_id=command[2],
            driver=webdriver,
            manager_params=manager_params,
            suffix=command[1]
        )

    if command[0] == 'FACEBOOK_LOGIN':
        facebook_commands.facebook_login(
            driver=webdriver,
            url=command[1],
            visit_id=command[2],
            manager_params=manager_params,
            browser_params=browser_params
        )

    if command[0] == 'REQUEST_FILTER':
        browser_commands.request_filter(
            control_message=command[1],
            filter_name=command[2],
            crawl_id=browser_params['crawl_id'],
            extension_sockets=extension_sockets,
            manager_params=manager_params
        )

    if command[0] == 'RUN_CUSTOM_FUNCTION':
        arg_dict = {"command": command,
                    "driver": webdriver,
                    "proxy_queue": proxy_queue,
                    "browser_settings": browser_settings,
                    "browser_params": browser_params,
                    "manager_params": manager_params,
                    "extension_sockets": extension_sockets}
        command[1](*command[2], visit_id=command[3], **arg_dict)
예제 #10
0
def execute_command(command, webdriver, browser_settings, browser_params,
                    manager_params, extension_socket):
    """Executes BrowserManager commands
    commands are of form (COMMAND, ARG0, ARG1, ...)
    """
    if command[0] == 'GET':
        browser_commands.get_website(
            url=command[1], sleep=command[2], visit_id=command[3],
            webdriver=webdriver, browser_params=browser_params,
            extension_socket=extension_socket)

    if command[0] == 'BROWSE':
        browser_commands.browse_website(
            url=command[1], num_links=command[2], sleep=command[3],
            visit_id=command[4], webdriver=webdriver,
            browser_params=browser_params, manager_params=manager_params,
            extension_socket=extension_socket)

    if command[0] == 'DUMP_FLASH_COOKIES':
        browser_commands.dump_flash_cookies(
            start_time=command[1], visit_id=command[2],
            webdriver=webdriver, browser_params=browser_params,
            manager_params=manager_params)

    if command[0] == 'DUMP_PROFILE_COOKIES':
        browser_commands.dump_profile_cookies(
            start_time=command[1], visit_id=command[2],
            webdriver=webdriver, browser_params=browser_params,
            manager_params=manager_params)

    if command[0] == 'DUMP_PROF':
        profile_commands.dump_profile(
            browser_profile_folder=browser_params['profile_path'],
            manager_params=manager_params,
            browser_params=browser_params,
            tar_location=command[1], close_webdriver=command[2],
            webdriver=webdriver, browser_settings=browser_settings,
            compress=command[3],
            save_flash=browser_params['disable_flash'] is False)

    if command[0] == 'DUMP_PAGE_SOURCE':
        browser_commands.dump_page_source(
            visit_id=command[2], driver=webdriver,
            manager_params=manager_params, suffix=command[1])

    if command[0] == 'RECURSIVE_DUMP_PAGE_SOURCE':
        browser_commands.recursive_dump_page_source(
            visit_id=command[2], driver=webdriver,
            manager_params=manager_params, suffix=command[1])

    if command[0] == 'SAVE_SCREENSHOT':
        browser_commands.save_screenshot(
            visit_id=command[2], crawl_id=browser_params['crawl_id'],
            driver=webdriver, manager_params=manager_params, suffix=command[1])

    if command[0] == 'SCREENSHOT_FULL_PAGE':
        browser_commands.screenshot_full_page(
            visit_id=command[2], crawl_id=browser_params['crawl_id'],
            driver=webdriver, manager_params=manager_params, suffix=command[1])

    if command[0] == 'RUN_CUSTOM_FUNCTION':
        arg_dict = {"command": command,
                    "driver": webdriver,
                    "browser_settings": browser_settings,
                    "browser_params": browser_params,
                    "manager_params": manager_params,
                    "extension_socket": extension_socket}
        command[1](*command[2], **arg_dict)