def execute_command(command, webdriver, proxy_queue, browser_settings, browser_params, manager_params, extension_socket): """ executes BrowserManager commands by passing command tuples into necessary helper function commands are of form (COMMAND, ARG0, ARG1, ...) the only imports in this file should be imports to helper libraries """ if command[0] == 'GET': browser_commands.get_website(url=command[1], sleep=command[2], scroll=command[3], visit_id=command[4], webdriver=webdriver, proxy_queue=proxy_queue, browser_params=browser_params, extension_socket=extension_socket) if command[0] == 'BROWSE': browser_commands.browse_website(url=command[1], num_links=command[2], sleep=command[3], visit_id=command[4], webdriver=webdriver, proxy_queue=proxy_queue, browser_params=browser_params, manager_params=manager_params, extension_socket=extension_socket) if command[0] == 'DUMP_FLASH_COOKIES': browser_commands.dump_flash_cookies(start_time=command[1], visit_id=command[2], webdriver=webdriver, browser_params=browser_params, manager_params=manager_params) if command[0] == 'DUMP_PROFILE_COOKIES': browser_commands.dump_profile_cookies(start_time=command[1], visit_id=command[2], webdriver=webdriver, browser_params=browser_params, manager_params=manager_params) if command[0] == 'DUMP_PROF': profile_commands.dump_profile(browser_profile_folder=browser_params['profile_path'], manager_params=manager_params, browser_params=browser_params, tar_location=command[1], close_webdriver=command[2], webdriver=webdriver, browser_settings=browser_settings, compress=command[3], save_flash=browser_params['disable_flash'] is False) if command[0] == 'EXTRACT_LINKS': browser_commands.extract_links(webdriver, browser_params, manager_params) if command[0] == 'SAVE_SCREENSHOT': browser_commands.save_screenshot(screenshot_name=command[1], webdriver=webdriver, browser_params=browser_params, manager_params=manager_params) if command[0] == 'DUMP_PAGE_SOURCE': browser_commands.dump_page_source(dump_name=command[1], webdriver=webdriver, browser_params=browser_params, manager_params=manager_params) if command[0] == 'EXTRACT_ELEMENTS': browser_commands.extract_elements(selector=command[1], webdriver=webdriver, browser_params=browser_params, manager_params=manager_params) if command[0] == 'RUN_CUSTOM_FUNCTION': arg_dict = {"command": command, "driver": webdriver, "proxy_queue": proxy_queue, "browser_settings": browser_settings, "browser_params": browser_params, "manager_params": manager_params, "extension_socket": extension_socket} command[1](*command[2], **arg_dict)
def _find_and_fill_form(webdriver, email_producer, visit_id, debug, browser_params, manager_params, logger): """Finds and fills a form, and returns True if accomplished.""" current_url = webdriver.current_url current_site_title = webdriver.title.encode('ascii', 'replace') main_handle = webdriver.current_window_handle in_iframe = False # debug: save before/after screenshots and page source debug_file_prefix = str(visit_id) + '_' debug_form_pre_initial = debug_file_prefix + 'form_initial_presubmit' debug_form_post_initial = debug_file_prefix + 'form_initial_result' debug_form_pre_followup = debug_file_prefix + 'form_followup_presubmit' debug_form_post_followup = debug_file_prefix + 'form_followup_result' debug_page_source_initial = debug_file_prefix + 'src_initial' debug_page_source_followup = debug_file_prefix + 'src_followup' # try to find newsletter form on landing page newsletter_form = _find_newsletter_form(webdriver) if newsletter_form is None: # search for forms in iframes (if present) iframes = webdriver.find_elements_by_tag_name('iframe') for iframe in iframes: # switch to the iframe webdriver.switch_to_frame(iframe) # is there a form? newsletter_form = _find_newsletter_form(webdriver) if newsletter_form is not None: if debug: dump_page_source(debug_page_source_initial, webdriver, browser_params, manager_params) in_iframe = True break # form found, stay on the iframe # switch back webdriver.switch_to_default_content() # still no form? if newsletter_form is None: return False elif debug: dump_page_source(debug_page_source_initial, webdriver, browser_params, manager_params) email = email_producer(current_url, current_site_title) user_info = _get_user_info(email) _form_fill_and_submit(newsletter_form, user_info, webdriver, False, browser_params, manager_params, debug_form_pre_initial if debug else None) logger.info('submitted form on [%s] with email [%s]', current_url, email) time.sleep(_FORM_SUBMIT_SLEEP) _dismiss_alert(webdriver) if debug: time.sleep(3) save_screenshot(debug_form_post_initial, webdriver, browser_params, manager_params) # fill any follow-up forms... wait_until_loaded(webdriver, _PAGE_LOAD_TIME) # wait if we got redirected follow_up_form = None # first check other windows (ex. pop-ups) windows = webdriver.window_handles if len(windows) > 1: form_found_in_popup = False for window in windows: if window != main_handle: webdriver.switch_to_window(window) # find newsletter form if follow_up_form is None: follow_up_form = _find_newsletter_form(webdriver) if follow_up_form is not None: if debug: dump_page_source(debug_page_source_followup, webdriver, browser_params, manager_params) _form_fill_and_submit( follow_up_form, user_info, webdriver, True, browser_params, manager_params, debug_form_pre_followup if debug else None) time.sleep(_FORM_SUBMIT_SLEEP) _dismiss_alert(webdriver) if debug: time.sleep(3) save_screenshot(debug_form_post_followup, webdriver, browser_params, manager_params) webdriver.close() webdriver.switch_to_window(main_handle) time.sleep(1) # else check current page if follow_up_form is None: follow_up_form = _find_newsletter_form(webdriver) if follow_up_form is not None: if debug: time.sleep(3) dump_page_source(debug_page_source_followup, webdriver, browser_params, manager_params) _form_fill_and_submit(follow_up_form, user_info, webdriver, True, browser_params, manager_params, debug_form_pre_followup if debug else None) time.sleep(_FORM_SUBMIT_SLEEP) _dismiss_alert(webdriver) if debug: time.sleep(3) save_screenshot(debug_form_post_followup, webdriver, browser_params, manager_params) # switch back if in_iframe: webdriver.switch_to_default_content() # close other windows (ex. pop-ups) windows = webdriver.window_handles if len(windows) > 1: for window in windows: if window != main_handle: webdriver.switch_to_window(window) webdriver.close() webdriver.switch_to_window(main_handle) time.sleep(1) return True
def execute_command(command, webdriver, proxy_queue, browser_settings, browser_params, manager_params, extension_sockets): """ executes BrowserManager commands by passing command tuples into necessary helper function commands are of form (COMMAND, ARG0, ARG1, ...) the only imports in this file should be imports to helper libraries """ if command[0] == 'GET': browser_commands.get_website( url=command[1], sleep=command[2], visit_id=command[3], webdriver=webdriver, proxy_queue=proxy_queue, browser_params=browser_params, extension_sockets=extension_sockets ) if command[0] == 'BROWSE': browser_commands.browse_website( url=command[1], num_links=command[2], sleep=command[3], visit_id=command[4], webdriver=webdriver, proxy_queue=proxy_queue, browser_params=browser_params, manager_params=manager_params, extension_sockets=extension_sockets ) if command[0] == 'DUMP_FLASH_COOKIES': browser_commands.dump_flash_cookies(start_time=command[1], visit_id=command[2], webdriver=webdriver, browser_params=browser_params, manager_params=manager_params) if command[0] == 'DUMP_PROFILE_COOKIES': browser_commands.dump_profile_cookies(start_time=command[1], visit_id=command[2], webdriver=webdriver, browser_params=browser_params, manager_params=manager_params) if command[0] == 'DUMP_PROF': profile_commands.dump_profile(browser_profile_folder=browser_params['profile_path'], manager_params=manager_params, browser_params=browser_params, tar_location=command[1], close_webdriver=command[2], webdriver=webdriver, browser_settings=browser_settings, compress=command[3], save_flash=browser_params['disable_flash'] is False) if command[0] == 'EXTRACT_LINKS': browser_commands.extract_links(webdriver, browser_params, manager_params) if command[0] == 'SAVE_SCREENSHOT': browser_commands.save_screenshot(screenshot_name=command[1], webdriver=webdriver, browser_params=browser_params, manager_params=manager_params) if command[0] == 'DUMP_PAGE_SOURCE': browser_commands.dump_page_source(dump_name=command[1], webdriver=webdriver, browser_params=browser_params, manager_params=manager_params) if command[0] == 'BROWSE_AND_DUMP_SOURCE': browser_commands.browse_and_dump_source( url=command[1], num_links=command[2], sleep=command[3], visit_id=command[4], webdriver=webdriver, proxy_queue=proxy_queue, browser_params=browser_params, manager_params=manager_params, extension_sockets=extension_sockets ) if command[0] == 'RECURSIVE_DUMP_PAGE_SOURCE': browser_commands.recursive_dump_page_source( visit_id=command[2], driver=webdriver, manager_params=manager_params, suffix=command[1] ) if command[0] == 'FACEBOOK_LOGIN': facebook_commands.facebook_login( driver=webdriver, url=command[1], visit_id=command[2], manager_params=manager_params, browser_params=browser_params ) if command[0] == 'REQUEST_FILTER': browser_commands.request_filter( control_message=command[1], filter_name=command[2], crawl_id=browser_params['crawl_id'], extension_sockets=extension_sockets, manager_params=manager_params ) if command[0] == 'RUN_CUSTOM_FUNCTION': arg_dict = {"command": command, "driver": webdriver, "proxy_queue": proxy_queue, "browser_settings": browser_settings, "browser_params": browser_params, "manager_params": manager_params, "extension_sockets": extension_sockets} command[1](*command[2], visit_id=command[3], **arg_dict)
def execute_command(command, webdriver, browser_settings, browser_params, manager_params, extension_socket): """Executes BrowserManager commands commands are of form (COMMAND, ARG0, ARG1, ...) """ if command[0] == 'GET': browser_commands.get_website( url=command[1], sleep=command[2], visit_id=command[3], webdriver=webdriver, browser_params=browser_params, extension_socket=extension_socket) if command[0] == 'BROWSE': browser_commands.browse_website( url=command[1], num_links=command[2], sleep=command[3], visit_id=command[4], webdriver=webdriver, browser_params=browser_params, manager_params=manager_params, extension_socket=extension_socket) if command[0] == 'DUMP_FLASH_COOKIES': browser_commands.dump_flash_cookies( start_time=command[1], visit_id=command[2], webdriver=webdriver, browser_params=browser_params, manager_params=manager_params) if command[0] == 'DUMP_PROFILE_COOKIES': browser_commands.dump_profile_cookies( start_time=command[1], visit_id=command[2], webdriver=webdriver, browser_params=browser_params, manager_params=manager_params) if command[0] == 'DUMP_PROF': profile_commands.dump_profile( browser_profile_folder=browser_params['profile_path'], manager_params=manager_params, browser_params=browser_params, tar_location=command[1], close_webdriver=command[2], webdriver=webdriver, browser_settings=browser_settings, compress=command[3], save_flash=browser_params['disable_flash'] is False) if command[0] == 'DUMP_PAGE_SOURCE': browser_commands.dump_page_source( visit_id=command[2], driver=webdriver, manager_params=manager_params, suffix=command[1]) if command[0] == 'RECURSIVE_DUMP_PAGE_SOURCE': browser_commands.recursive_dump_page_source( visit_id=command[2], driver=webdriver, manager_params=manager_params, suffix=command[1]) if command[0] == 'SAVE_SCREENSHOT': browser_commands.save_screenshot( visit_id=command[2], crawl_id=browser_params['crawl_id'], driver=webdriver, manager_params=manager_params, suffix=command[1]) if command[0] == 'SCREENSHOT_FULL_PAGE': browser_commands.screenshot_full_page( visit_id=command[2], crawl_id=browser_params['crawl_id'], driver=webdriver, manager_params=manager_params, suffix=command[1]) if command[0] == 'RUN_CUSTOM_FUNCTION': arg_dict = {"command": command, "driver": webdriver, "browser_settings": browser_settings, "browser_params": browser_params, "manager_params": manager_params, "extension_socket": extension_socket} command[1](*command[2], **arg_dict)
def _find_and_fill_form(webdriver, user_data, visit_id, debug, browser_params, manager_params, logger): """Finds and fills a form, and returns True if accomplished.""" current_url = webdriver.current_url current_site_title = webdriver.title.encode('ascii', 'replace') main_handle = webdriver.current_window_handle in_iframe = False if debug: logger.debug('The current URL is %s' % current_url) # debug: save before/after screenshots and page source debug_file_prefix = str(visit_id) + '_' debug_form_pre_initial = debug_file_prefix + 'form_initial_presubmit' debug_form_post_initial = debug_file_prefix + 'form_initial_result' debug_form_pre_followup = debug_file_prefix + 'form_followup_presubmit' debug_form_post_followup = debug_file_prefix + 'form_followup_result' debug_page_source_initial = debug_file_prefix + 'src_initial' debug_page_source_followup = debug_file_prefix + 'src_followup' newsletter_form = None # Search for a modal dialog, and for a form in the modal dialog # Search for no more than two modal dialogs try: search_count = 0 while (search_count < _MAX_POPUP_DISMISS): if debug: logger.debug('Round %d of modal dialog search...' % search_count) dialog_container = _get_dialog_container(webdriver) if dialog_container: if debug: logger.debug( 'Modal dialog found, searching for newsletter form in dialog...' ) newsletter_form = _find_newsletter_form( dialog_container, webdriver, debug, logger) if newsletter_form is None: clicked = _dismiss_dialog(webdriver, dialog_container) if debug: if int(clicked) > 0: if debug: logger.debug( 'No newsletter form in dialog, dismissed it' ) else: if debug: logger.debug( 'Made no clicks to dismiss the dialog') webdriver.find_element_by_tag_name( 'html').send_keys(Keys.ESCAPE) logger.debug( 'Pressed ESC to dismiss the dialog') else: if debug: logger.debug('Found a newsletter form in the dialog') break else: if debug: logger.debug('No dialog on the page') break search_count += 1 except Exception as e: logger.error('Error while examining for modal dialogs: %s' % str(e)) # try to find newsletter forms on landing page after dismissing the dialog if newsletter_form is None: if debug: logger.debug( 'Searching the rest of the page for a newsletter form') newsletter_form = _find_newsletter_form(None, webdriver, debug, logger) # Search for newsletter forms in iframes if newsletter_form is None: if debug: logger.debug( 'No newsletter form found on this page, searching for forms in iframes...' ) # search for forms in iframes (if present) iframes = webdriver.find_elements_by_tag_name( 'iframe') + webdriver.find_elements_by_tag_name('frame') if debug: logger.debug('Searching in %d iframes' % len(iframes)) for iframe in iframes: try: # switch to the iframe webdriver.switch_to_frame(iframe) # is there a form? newsletter_form = _find_newsletter_form( None, webdriver, debug, logger) if newsletter_form is not None: if debug: dump_page_source(debug_page_source_initial, webdriver, browser_params, manager_params) logger.debug( 'Found a newsletter in an iframe on this page') in_iframe = True break # form found, stay on the iframe # switch back webdriver.switch_to_default_content() except Exception as e: if debug: logger.error('Error while analyzing an iframe: %s' % str(e)) webdriver.switch_to_default_content() # still no form? if newsletter_form is None: if debug: logger.debug('None of the iframes have newsletter forms') return False elif debug: dump_page_source(debug_page_source_initial, webdriver, browser_params, manager_params) email = user_data['email'] user_info = user_data _form_fill_and_submit(newsletter_form, user_info, webdriver, True, browser_params, manager_params, debug_form_pre_initial if debug else None) logger.info('Submitted form on [%s] with email [%s] on visit_id [%d]', current_url, email, visit_id) time.sleep(_FORM_SUBMIT_SLEEP) _dismiss_alert(webdriver) if debug: save_screenshot(debug_form_post_initial, webdriver, browser_params, manager_params) logger.debug('The current URL is %s' % webdriver.current_url) logger.debug('Filling any follow-up forms on this page...') # fill any follow-up forms... wait_until_loaded(webdriver, _PAGE_LOAD_TIME) # wait if we got redirected follow_up_form = None # first check other windows (ex. pop-ups) windows = webdriver.window_handles if len(windows) > 1: if debug: logger.debug('Found %d windows (e.g., popups)' % len(windows)) form_found_in_popup = False for window in windows: if window != main_handle: webdriver.switch_to_window(window) # find newsletter form if follow_up_form is None: follow_up_form = _find_newsletter_form( None, webdriver, debug, logger) if follow_up_form is not None: if debug: dump_page_source(debug_page_source_followup, webdriver, browser_params, manager_params) logger.debug( 'Found a newsletter form in another window') _form_fill_and_submit( follow_up_form, user_info, webdriver, True, browser_params, manager_params, debug_form_pre_followup if debug else None) logger.info( 'Submitted form on [%s] with email [%s] on visit_id [%d]', webdriver.current_url, email, visit_id) time.sleep(_FORM_SUBMIT_SLEEP) _dismiss_alert(webdriver) if debug: save_screenshot(debug_form_post_followup, webdriver, browser_params, manager_params) webdriver.close() webdriver.switch_to_window(main_handle) time.sleep(1) # else check current page if follow_up_form is None: if debug: logger.debug( 'Found no follow-up forms in other windows, checking current page' ) follow_up_form = _find_newsletter_form(None, webdriver, debug, logger) if follow_up_form is not None: if debug: dump_page_source(debug_page_source_followup, webdriver, browser_params, manager_params) logger.debug('Found a follow-up form in this page') _form_fill_and_submit(follow_up_form, user_info, webdriver, True, browser_params, manager_params, debug_form_pre_followup if debug else None) logger.info( 'Submitted form on [%s] with email [%s] on visit_id [%d]', webdriver.current_url, email, visit_id) time.sleep(_FORM_SUBMIT_SLEEP) _dismiss_alert(webdriver) if debug: save_screenshot(debug_form_post_followup, webdriver, browser_params, manager_params) else: if debug: logger.debug('No follow-up forms on the current page') # switch back if in_iframe: if debug: logger.debug( 'We were in an iframe, switching back to the main window') webdriver.switch_to_default_content() # close other windows (ex. pop-ups) windows = webdriver.window_handles if len(windows) > 1: if debug: logger.debug('Closing %d windows (e.g., popups)' % len(windows)) for window in windows: if window != main_handle: webdriver.switch_to_window(window) webdriver.close() webdriver.switch_to_window(main_handle) time.sleep(1) return True