Ejemplo n.º 1
0
    def execute(self, url):
        """
        Creates a selenium webbrowser-instance( with the profile and options set in the attributes)
        and retrieves Data from Website

        :param url: The Url ist the Target on which the included actions will be performed on
        The results will be stored in class attributes
        """
        """Creation of Webbrowser instance"""
        profile = FirefoxProfile(self.profilepath)
        profile.set_preference("network.cookie.cookieBehavior", 0)
        profile.update_preferences()
        options = Options()
        options.headless = self.headless
        browser = webdriver.Firefox(firefox_profile=profile,
                                    options=options,
                                    executable_path=self.geckopath)
        browser.get(url)
        """Retrieval of Data"""
        """Get Sourcecode"""
        self.source = browser.page_source
        """Get the currently used URL"""
        self.trueurl = browser.current_url
        """Get Cookies from the opened website"""
        self.cookies = browser.get_cookies()
        """make screenshots of the page"""
        time.sleep(3)
        self.imagepath = str(os.path.abspath(
            os.getcwd())) + '\screenshot' + str(self)[-19:-1] + '.png'
        browser.save_screenshot(self.imagepath)
        """quit Browser to clear temporary storage"""
        browser.quit()
Ejemplo n.º 2
0
def init_glob_vars():
    global PROFILE_glob
    global CAPABILITIES_glob
    global SCRIPT_DIR_glob
    global EXECUTABLE_PATH_glob
    global PROXY_ADDRESS_glob
    global PROXY_glob

    # Browser settings
    PROFILE_glob = FirefoxProfile(
        '/Users/altay.amanbay/Library/Application Support/Firefox/Profiles')
    PROFILE_glob.set_preference("network.proxy.type", 1)
    PROFILE_glob.set_preference("network.proxy.socks", "127.0.0.1")
    PROFILE_glob.set_preference("network.proxy.socks_port", 9150)
    PROFILE_glob.set_preference("network.proxy.socks_version", 5)
    PROFILE_glob.set_preference("network.proxy.socks_remote_dns", True)
    PROFILE_glob.update_preferences()

    CAPABILITIES_glob = DesiredCapabilities.FIREFOX
    CAPABILITIES_glob["marionette"] = True
    CAPABILITIES_glob[
        "binary"] = "/Applications/Firefox.app/Contents/MacOS/firefox-bin"

    # Proxy (start Tor browser before executing script)
    PROXY_ADDRESS_glob = "127.0.0.1:9150"  # localhost and Tor browser's default port number
    PROXY_glob = Proxy({
        'proxyType': ProxyType.MANUAL,
        'httpProxy': PROXY_ADDRESS_glob,
    })

    # Webdriver path
    SCRIPT_DIR_glob = os.path.dirname(os.path.abspath(__file__))
    print('Abs path:', SCRIPT_DIR_glob)
    EXECUTABLE_PATH_glob = SCRIPT_DIR_glob + '/webdrivers/geckodriver - v0.18.0/geckodriver'
Ejemplo n.º 3
0
def prepareTor():

    if not Settings['TOR']:
        profile = FirefoxProfile()
    else:
        profile = (
            FirefoxProfile(Settings['PROFILE_PATH']) if Settings['PROFILE_PATH']
            else FirefoxProfile()
        )
        profile.set_preference('network.proxy.type', 1)
        profile.set_preference('network.proxy.socks', '127.0.0.1')
        profile.set_preference('network.proxy.socks_port', 9050)
        profile.set_preference('network.proxy.socks_remote_dns', False)

    random.shuffle(Settings['USER_AGENTS'])
    profile.set_preference("general.useragent.override", Settings['USER_AGENTS'][0])
    profile.set_preference("intl.accept_languages", "en-US")
    profile.update_preferences()

    driver = webdriver.Firefox(firefox_profile = profile,
    executable_path=Settings['GECKO_DRIVER'])

    driver.get("http://check.torproject.org")
    driver.implicitly_wait(10)
    return driver
Ejemplo n.º 4
0
def session_create(config):
    log.info("Creating session")

    options = Options()

    if config.get('headless', False) is True:
        log.info("Headless mode")
        options.add_argument("--headless")

    if config.get('webdriver_enabled') is False:
        options.set_preference("dom.webdriver.enabled", False)

    # selenium_profile = webdriver.FirefoxProfile('/home/container-dev/.mozilla/firefox/')
    # selenium_profile.update_preferences()
    # options.add_argument("-profile")
    # options.add_argument("/tmp/ff1")
    # driver = webdriver.Firefox(options=options, service_log_path=path.join("..", "data","geckodriver.log"))
    # driver = webdriver.Chrome()

    profile = FirefoxProfile()
    profile.set_preference("dom.webdriver.enabled", False)
    profile.set_preference('useAutomationExtension', False)
    profile.update_preferences()
    desired = DesiredCapabilities.FIREFOX

    driver = webdriver.Firefox(options=options,
                               firefox_profile=profile,
                               desired_capabilities=desired)

    log.info("New session is: %s %s" %
             (driver.session_id, driver.command_executor._url))

    return driver
Ejemplo n.º 5
0
 def get_firefox_profile(cls, temp_download_dir):
     profile = FirefoxProfile()
     profile.set_preference("browser.download.folderList", 2)
     profile.set_preference("browser.download.dir",
                            str(temp_download_dir.absolute()))
     profile.set_preference("browser.download.useDownloadDir", True)
     profile.set_preference(
         "browser.download.viewableInternally.enabledTypes", "")
     profile.set_preference(
         "browser.helperApps.neverAsk.saveToDisk",
         "application/pdf;text/plain;application/text;text/xml;application/xml;application/vnd.openxmlformats-officedocument.wordprocessingml.document;application/rtf;application/vnd.rar;text/plain;image/webp;image/bmp;image/jpeg;application/x-7z-compressed;application/zip;application/x-tar;application/gzip;application/vnd.openxmlformats-officedocument.presentationml.presentation;application/octet-stream;application/x-rar-compressed;application/x-zip-compressed;multipart/x-zip;application/vnd.oasis.opendocument.presentation;application/vnd.oasis.opendocument.spreadsheet;application/vnd.oasis.opendocument.text",
     )
     profile.set_preference("pdfjs.disabled", True)
     profile.update_preferences()
     return profile
Ejemplo n.º 6
0
    def setUp(self):
        profile = FirefoxProfile()
        # Ignore certification
        profile.set_preference("webdriver_assume_untrusted_issuer", False)
        profile.set_preference("webdriver_accept_untrusted_certs", True)
        profile.accept_untrusted_certs = True
        
        # Set user agent
        profile.set_preference("general.useragent.override", "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A365 Safari/600.1.4")
        profile.update_preferences()
        self.driver = webdriver.Firefox(profile)

        self.base_url = "http://m.finance.yahoo.co.jp/"
        self.driver.implicitly_wait(30)
        self.accept_next_alert = True
Ejemplo n.º 7
0
    def br(self):

        display = Display(visible=0, size=display_size)
        display.start()

        profile = FirefoxProfile()
        profile.set_preference('network.proxy.type', 1)
        profile.set_preference('network.proxy.socks', tor_proxy)
        profile.set_preference('network.proxy.socks_port', tor_port)
        profile.set_preference("network.proxy.socks_remote_dns", False)
        profile.update_preferences()

        driver = webdriver.Firefox(firefox_profile=profile,
                                   executable_path=geckodriver_path)
        # driver = webdriver.Firefox(executable_path = geckodriver_path)
        with self.lock:
            self.browslist.append(driver)
Ejemplo n.º 8
0
def _load_firefox_profile():
    # create a firefox profile using the template in data/firefox_profile.js.template
    global firefox_profile_tmpdir
    if firefox_profile_tmpdir is None:
        firefox_profile_tmpdir = mkdtemp(prefix='firefox_profile_')
        # Clean up tempdir at exit
        atexit.register(rmtree, firefox_profile_tmpdir)

    template = data_path.join('firefox_profile.js.template').read()
    profile_json = Template(template).substitute(profile_dir=firefox_profile_tmpdir)
    profile_dict = json.loads(profile_json)

    profile = FirefoxProfile(firefox_profile_tmpdir)
    for pref in profile_dict.iteritems():
        profile.set_preference(*pref)
    profile.update_preferences()
    return profile
Ejemplo n.º 9
0
def _load_firefox_profile():
    # create a firefox profile using the template in data/firefox_profile.js.template

    # Make a new firefox profile dir if it's unset or doesn't exist for some reason
    firefox_profile_tmpdir = mkdtemp(prefix='firefox_profile_')
    log.debug("created firefox profile")
    # Clean up tempdir at exit
    atexit.register(rmtree, firefox_profile_tmpdir, ignore_errors=True)

    template = data_path.join('firefox_profile.js.template').read()
    profile_json = Template(template).substitute(profile_dir=firefox_profile_tmpdir)
    profile_dict = json.loads(profile_json)

    profile = FirefoxProfile(firefox_profile_tmpdir)
    [profile.set_preference(*pref) for pref in profile_dict.items()]
    profile.update_preferences()
    return profile
Ejemplo n.º 10
0
def get_driver(device):
    fp = FirefoxProfile()
    # fp.set_preference('media.navigator.permission.disabled', True)
    fp.update_preferences()
    caps = DesiredCapabilities.FIREFOX.copy()
    caps['firefox_profile'] = fp.encoded
    # caps['marionette'] = True
    # webdriver.Firefox(capabilities=caps)

    # options = Options()
    # options.set_headless(True)
    # options.add_argument('--hide-scrollbars')
    # caps = options.to_capabilities()
    driver = webdriver.Remote(
        command_executor=settings.SELENIUM_COMMAND_EXECUTER,
        desired_capabilities=caps,
    )
    return driver
Ejemplo n.º 11
0
    def setUp(self):
        profile = FirefoxProfile()
        # Ignore certification
        profile.set_preference("webdriver_assume_untrusted_issuer", False)
        profile.set_preference("webdriver_accept_untrusted_certs", True)
        profile.accept_untrusted_certs = True

        # Set user agent
        profile.set_preference(
            "general.useragent.override",
            "Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A365 Safari/600.1.4"
        )
        profile.update_preferences()
        self.driver = webdriver.Firefox(profile)

        self.base_url = "http://m.finance.yahoo.co.jp/"
        self.driver.implicitly_wait(30)
        self.accept_next_alert = True
Ejemplo n.º 12
0
def _load_firefox_profile():
    # create a firefox profile using the template in data/firefox_profile.js.template
    global firefox_profile_tmpdir
    if firefox_profile_tmpdir is None:
        firefox_profile_tmpdir = mkdtemp(prefix='firefox_profile_')
        # Clean up tempdir at exit
        atexit.register(rmtree, firefox_profile_tmpdir)

    template = data_path.join('firefox_profile.js.template').read()
    profile_json = Template(template).substitute(
        profile_dir=firefox_profile_tmpdir)
    profile_dict = json.loads(profile_json)

    profile = FirefoxProfile(firefox_profile_tmpdir)
    for pref in profile_dict.iteritems():
        profile.set_preference(*pref)
    profile.update_preferences()
    return profile
Ejemplo n.º 13
0
def _load_firefox_profile():
    # create a firefox profile using the template in data/firefox_profile.js.template

    # Make a new firefox profile dir if it's unset or doesn't exist for some reason
    firefox_profile_tmpdir = mkdtemp(prefix='firefox_profile_')
    log.debug("created firefox profile")
    # Clean up tempdir at exit
    atexit.register(rmtree, firefox_profile_tmpdir, ignore_errors=True)

    template = data_path.join('firefox_profile.js.template').read()
    profile_json = Template(template).substitute(profile_dir=firefox_profile_tmpdir)
    profile_dict = json.loads(profile_json)

    profile = FirefoxProfile(firefox_profile_tmpdir)
    for pref in profile_dict.items():
        profile.set_preference(*pref)
    profile.update_preferences()
    return profile
Ejemplo n.º 14
0
    def create_firefox_ninja(self, desired_capabilities, profile, options, ip):
        """
        Recommended to edit chromedriver.exe by replacing all 'cdc_' text to
        'dog_' or anything else. Also, `profile` argument is ignored.
        """
        if profile is not None:
            self.log.warning(
                'Firefox ninja received a `profile` which will be '
                'ignored and replaced by `ninja profile`.')
        profile = FirefoxProfile()
        profile.set_preference("dom.webdriver.enabled", False)
        profile.set_preference('useAutomationExtension', False)
        profile.update_preferences()
        if desired_capabilities is None:
            desired_capabilities = DesiredCapabilities.FIREFOX
        desired_capabilities['marionette'] = True

        return self.create_firefox(desired_capabilities, profile, options, ip)
Ejemplo n.º 15
0
def _load_firefox_profile():
    # create a firefox profile using the template in data/firefox_profile.js.template
    global firefox_profile_tmpdir

    # Make a new firefox profile dir if it's unset or doesn't exist for some reason
    if firefox_profile_tmpdir is None or not os.path.exists(firefox_profile_tmpdir):
        firefox_profile_tmpdir = mkdtemp(prefix="firefox_profile_")
        # Clean up tempdir at exit
        atexit.register(rmtree, firefox_profile_tmpdir, ignore_errors=True)

    template = data_path.join("firefox_profile.js.template").read()
    profile_json = Template(template).substitute(profile_dir=firefox_profile_tmpdir)
    profile_dict = json.loads(profile_json)

    profile = FirefoxProfile(firefox_profile_tmpdir)
    for pref in profile_dict.iteritems():
        profile.set_preference(*pref)
    profile.update_preferences()
    return profile
Ejemplo n.º 16
0
    def connect(pathTor, pathProfile, isHeadless):
        torexe = os.popen(pathTor)
        profile = FirefoxProfile(pathProfile)

        profile.set_preference('network.proxy.type', 1)
        profile.set_preference('network.proxy.socks', '127.0.0.1')
        profile.set_preference('network.proxy.socks_port', 9050)
        profile.set_preference("network.proxy.socks_remote_dns", False)
        profile.update_preferences()
        if isHeadless:
            options = Options()
            options.add_argument('--headless')
            options.add_argument(
                '--disable-gpu')  # Last I checked this was necessary.
            return webdriver.Firefox(firefox_profile=profile,
                                     executable_path=r'geckodriver.exe',
                                     options=options)
        else:
            return webdriver.Firefox(firefox_profile=profile,
                                     executable_path=r'geckodriver.exe')
Ejemplo n.º 17
0
    def init(self, config):
        self.config = config
        print self.config.browser_driver_command_executor
        command_executor = self.config.browser_driver_command_executor
        if self.config.browser_profile_dir:
            profile = FirefoxProfile(
                profile_directory=self.config.browser_profile_dir)
            self.dmesg('loading browser profile')
        else:
            profile = None
            self.dmesg('browser profile not configured, use None profile')
        self.dmesg('trying start a browser')

        px = config.proxy_type
        if px in ['socks', 'http']:
            self.dmesg('apply %s proxy, %s:%s' %
                       (px, config.proxy_host, config.proxy_port))
            profile.set_preference("network.proxy.type", 1)
            profile.set_preference("network.proxy.%s" % px, config.proxy_host)
            profile.set_preference("network.proxy.%s_port" % px,
                                   int(config.proxy_port))

        profile.set_preference("network.proxy.socks_version", 5)
        profile.update_preferences()

        self.driver = webdriver.Remote(
            command_executor=self.config.browser_driver_command_executor,
            desired_capabilities=DesiredCapabilities.FIREFOX,
            browser_profile=profile)
        try:
            self.driver.get(self.config.start_url)
        except:
            self.dmesg('打开失败,10s 后重试...')
            sleep(10)
            try:
                self.driver.get(self.config.start_url)
            except:
                self.dmesg('打开失败,10s 后重试...')
                sleep(10)
                self.driver.get(self.config.start_url)
Ejemplo n.º 18
0
    def __create_driver():
        """ Метод создания дрйавера с заданными настройками """

        binary = FirefoxBinary(FIREFOX_BINARY)

        options = Options()

        if HEADLESS:
            options.add_argument("--headless")

        if USE_TOR:
            os.popen(TOR_EXE)
            profile = FirefoxProfile(TOR_PROFILE)
        else:
            profile = FirefoxProfile()

        if USE_TOR or USE_PROXY:
            profile.set_preference("network.proxy.type", 1)
            profile.set_preference("network.proxy.socks", PROXY_HOST)
            profile.set_preference("network.proxy.socks_port", PROXY_PORT)
            profile.set_preference("network.proxy.socks_remote_dns", False)

        if not IMAGES:
            profile.set_preference('permissions.default.image', 2)

        if not CSS:
            profile.set_preference('permissions.default.stylesheet', 2)

        if not FLASH:
            profile.set_preference('dom.ipc.plugins.enabled.libflashplayer.so',
                                   'false')

        if not NOTIFICATIONS:
            profile.set_preference("dom.push.enabled", False)

        profile.update_preferences()

        return webdriver.Firefox(firefox_profile=profile,
                                 firefox_binary=binary,
                                 options=options)
Ejemplo n.º 19
0
class antidriver:
    def __init__(self):
        pass

    def chrome(self):
        option = webdriver.ChromeOptions()
        option.add_argument(
            '--user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36"'
        )
        option.add_experimental_option('excludeSwitches', [
            'enable-automation', "ignore-certificate-errors",
            "safebrowsing-disable-download-protection",
            "safebrowsing-disable-auto-update",
            "disable-client-side-phishing-detection"
        ])
        #option.add_argument('--headless')
        #username = os.getenv("USERNAME")
        #userProfile = "C:\\Users\\" + username + "\\AppData\\Local\\Google\\Chrome\\User Data\\Default"
        #option.add_argument("user-data-dir={}".format(userProfile))
        option.add_argument('disable-infobars')
        localpath = "Google Chrome\chromedriver.exe"
        os.environ["webdriver.chrome.driver"] = localpath
        self.driver = webdriver.Chrome(chrome_options=option,
                                       executable_path=localpath)
        self.driver.delete_all_cookies()
        return self.driver

    def firefox(self, data={"ip": "221.225.91.175", "port": 43301}):
        '以代理的形式启动一个firefox'
        self.location = 'C:\Program Files\Mozilla Firefox\\firefox.exe'
        self.profile = FirefoxProfile()
        self.profile.set_preference("network.proxy.type", 1)
        self.profile.set_preference('network.proxy.http', data['ip'])
        self.profile.set_preference('network.proxy.http_port',
                                    data['port'])  # int
        self.profile.update_preferences()
        self.driver = webdriver.Firefox(firefox_profile=self.profile,
                                        firefox_binary=self.location)
        return self.driver
Ejemplo n.º 20
0
    def get_firefox_webdriver_args(self):
        options = selenium.webdriver.FirefoxOptions()

        if self['headless']:
            options.add_argument('-headless')

        profile = FirefoxProfile(
            os.environ.get(
                'GOCEPT_WEBDRIVER_FF_PROFILE',
                os.environ.get('GOCEPT_SELENIUM_FF_PROFILE')))
        profile.native_events_enabled = True
        profile.update_preferences()

        # Save downloads always to disk into a predefined dir.
        profile.set_preference("browser.download.folderList", 2)
        profile.set_preference(
            "browser.download.manager.showWhenStarting", False)
        profile.set_preference(
            "browser.download.dir", str(self['selenium_download_dir']))
        profile.set_preference(
            "browser.helperApps.neverAsk.saveToDisk", "application/pdf")
        profile.set_preference("pdfjs.disabled", True)

        return {'options': options, 'firefox_profile': profile}
Ejemplo n.º 21
0
def firefox_builder():
    binary = FirefoxBinary(FIREFOX_BINARY_PATH)
    profile = FirefoxProfile()

    # Flash settings
    # if platform.system().lower() != "windows":
    #     profile.set_preference("plugin.flash.path", FLASH_PATH)
    # profile.set_preference("dom.ipc.plugins.flash.disable-protected-mode", True)
    # profile.set_preference("plugins.flashBlock.enabled", False)
    # profile.set_preference("plugin.state.flash", 2)

    profile.set_preference(
        "general.useragent.override",
        "UserAgent: Mozilla/5.0 (X11; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0"
    )
    profile.set_preference("app.update.auto", False)
    profile.set_preference("app.update.enabled", False)

    # apply the setting under (A) to ALL new windows (even script windows with features)
    profile.set_preference("browser.link.open_newwindow.restriction", 0)
    profile.set_preference("browser.link.open_newwindow.override.external",
                           2)  # open external links in a new window
    profile.set_preference("browser.link.open_newwindow",
                           3)  # divert new window to a new tab
    ##
    # profile.set_preference("network.http.connection-timeout", 15)
    # profile.set_preference("network.http.connection-retry-timeout", 15)
    ##
    profile.update_preferences()

    opts = FirefoxOptions
    opts.headless = HEADLESS

    browser = webdriver.Firefox(firefox_binary=binary,
                                options=opts,
                                firefox_profile=profile,
                                executable_path=FIREFOX_DRIVER_PATH)
    # firefox flash check
    # browser.get("about:config")
    # browser.find_element_by_xpath('//*[@id="warningButton"]').click()
    # browser.find_element_by_css_selector(
    #     "window#config deck#configDeck vbox hbox#filterRow textbox#textbox input.textbox-input").send_keys(
    #     "flash" + Keys.ENTER)
    # browser.save_screenshot("sc.png")
    # logging.info("about:config => flash settings, screenshot captured")
    # sleep(1)

    # browser.get("https://isflashinstalled.com/")
    # logging.info(f"{browser.find_element_by_css_selector('body').text.split()[:4]}")
    # browser.get("https://www.whatismybrowser.com/detect/is-flash-installed")
    # is_installed = re.search("Flash \d\d?.\d\d?.\d\d? is installed",
    #                                browser.find_element_by_xpath('//*[@id="detected_value"]').text)
    # browser.implicitly_wait(10)
    # browser.get("https://toolster.net/flash_checker")
    # elmnt = browser.find_element_by_css_selector(
    # "html body div#main div#center div#tool_padding div#flash_checker div#bottom_info div#double-version.vtor_info")
    # is_installed = re.search(r"You have installed Flash Player v.\d\d?.\d\d?.\d\d?", elmnt.text)
    #
    # assert is_installed, "Flash is disabled or not installed!"
    # logging.info(f"Check flash response: {is_installed.group()}")
    return browser
Ejemplo n.º 22
0
def create_profile(path=None,
                   user_agent=None,
                   accept_language=None,
                   proxy=None,
                   proxy_type=None,
                   no_proxy_hosts=None,
                   download_directory=None,
                   download_content_type=None):
    """
    @paramDownload_content_type: CSV string
    """

    if path is not None:
        profile = FirefoxProfile(path)
    else:
        profile = FirefoxProfile()

    # Memory and cpu optimization
    profile.set_preference('browser.sessionhistory.max_total_viewers', 0)
    #profile.set_preference('browser.cache.memory.enable', False)
    #profile.set_preference('browser.cache.offline.enable', False)
    #profile.set_preference('browser.cache.disk.enable', False)
    profile.set_preference('browser.safebrowsing.enabled', False)
    profile.set_preference('browser.shell.checkDefaultBrowser', False)
    profile.set_preference('browser.startup.page', 0)
    profile.set_preference('dom.ipc.plugins.enabled.timeoutSecs', 15)
    profile.set_preference('dom.max_script_run_time', 10)
    profile.set_preference('extensions.checkCompatibility', False)
    profile.set_preference('extensions.checkUpdateSecurity', False)
    profile.set_preference('extensions.update.autoUpdateEnabled', False)
    profile.set_preference('extensions.update.enabled', False)
    profile.set_preference('network.http.max-connections-per-server', 30)
    profile.set_preference('network.prefetch-next', False)
    profile.set_preference('plugin.default_plugin_disabled', False)
    profile.set_preference('print.postscript.enabled', False)
    profile.set_preference('toolkit.storage.synchronous', 0)
    profile.set_preference('image.animation_mode', 'none')
    profile.set_preference('images.dither', False)
    profile.set_preference('content.notify.interval', 1000000)
    profile.set_preference('content.switch.treshold', 100000)
    profile.set_preference('nglayout.initialpaint.delay', 1000000)
    profile.set_preference('network.dnscacheentries', 200)
    profile.set_preference('network.dnscacheexpiration', 600)

    if user_agent is not None:
        profile.set_preference("general.useragent.override", user_agent)

    if accept_language is not None:
        profile.set_preference('intl.accept_languages', accept_language)

    if proxy is not None:
        logging.debug('Setting up proxy %s [%s]' % (proxy, proxy_type))
        server, port = proxy.split(':')
        if proxy_type == 'socks5':
            profile.set_preference("network.proxy.socks", server)
            profile.set_preference("network.proxy.socks_port", int(port))
        elif proxy_type == 'http':
            profile.set_preference("network.proxy.http", server)
            profile.set_preference("network.proxy.http_port", int(port))
        else:
            raise Exception('Unkown proxy type: %s' % proxy_type)
        profile.set_preference("network.proxy.type", 1)

    if no_proxy_hosts is not None:
        csv = ', '.join(no_proxy_hosts)
        profile.set_preference('network.proxy.no_proxies_on',
                               'localhost, 127.0.0.1, %s' % csv)

    if download_directory is not None and download_content_type is not None:
        profile.set_preference("browser.download.folderList", 2)
        profile.set_preference("browser.download.manager.showWhenStarting",
                               False)
        profile.set_preference("browser.download.dir", download_directory)
        profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
                               download_content_type)

    profile.update_preferences()
    return profile
Ejemplo n.º 23
0
    f = open(os.getcwd() + '\\SBR_MLB_Lines_' + season + '_line_moves.txt', 'a')
    f.write('Date,Team,Team_SP,Opp,Opp_SP,Bet_Length,Bet_Type,Line_Move_Time,Over_Under,Line,Odds')
    f.write('\n')
    f.close()

    ##
    from selenium import webdriver
    from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
    import os

    # torexe = os.popen(r'/home/kc/Downloads/tor-browser_en-US/Browser/TorBrowser/Tor/tor.exe')
    profile = FirefoxProfile(r'/home/kc/Downloads/tor-browser_en-US/Browser/TorBrowser/Data/Browser/profile.default')
    # profile = FirefoxProfile(r'C:/users/kj/Desktop/torbrowser/Browser/TorBrowser/Data/Browser/profile.default')
    profile.set_preference('network.proxy.type', 1)
    profile.set_preference('network.proxy.socks', '127.0.0.1')
    profile.set_preference('network.proxy.socks_port', 9150)
    profile.set_preference("network.proxy.socks_remote_dns", False)
    profile.update_preferences()
    # driver.get("http://check.torproject.org")

    #
    time.sleep(5)
    # to uitilize a headless driver, download and install phantomjs and use below to open driver instead of above line
    # download link -- http://phantomjs.org/download.html
    # driver = webdriver.PhantomJS(r"C:\Users\Monstar\Python\phantomjs-2.0.0\bin\phantomjs.exe")

    for y in range(int(start_month) - 1, int(end_month)):
        try:
            run_main(profile, season, y)
        except KeyboardInterrupt:
            break
Ejemplo n.º 24
0
firefox_options.binary_location = '/Applications/Firefox.app/Contents/MacOS/firefox'
# firefox_options.headless = True

# FirefoxProfile
firefox_profile = FirefoxProfile();
firefox_profile.set_preference('browser.privatebrowsing.autostart', True)
firefox_profile.set_preference('pdfjs.disabled', True)
firefox_profile.set_preference('browser.download.folderList', 2)
firefox_profile.set_preference('browser.download.panel.shown', False)
firefox_profile.set_preference('browser.tabs.warnOnClose', False)
firefox_profile.set_preference('browser.tabs.animate', False)
firefox_profile.set_preference('browser.fullscreen.animateUp', 0)
firefox_profile.set_preference('geo.enabled', False)
firefox_profile.set_preference('browser.urlbar.suggest.searches', False)
firefox_profile.set_preference('browser.tabs.warnOnCloseOtherTabs', False)
firefox_profile.update_preferences()

# DesiredCapabilities
firefox_capabilities = webdriver.DesiredCapabilities.FIREFOX.copy()
firefox_capabilities['marionette'] = True
firefox_capabilities['acceptInsecureCerts'] = True

# iniciar navegador
w = webdriver.Firefox(options=firefox_options, firefox_profile=firefox_profile,
                      desired_capabilities=firefox_capabilities)

# tempo de espera padrao
w.implicitly_wait(15)

w.get('https://www.uol.com.br')
def bidCrawling(request):

    if request.method == 'POST':
        form = bidCrawlingForm(request.POST)

        if form.is_valid():
            searchStartBid = form.cleaned_data['searchstartbid']
            searchEndBid = form.cleaned_data['searchendbid']

    bidSearchStart = int(searchStartBid)
    bidSearchEnd = int(searchEndBid)

    engine = create_engine(
        "mysql+pymysql://root:[email protected]:3306/book?charset=utf8mb4",
        encoding='utf8')
    torexe = os.popen(r'C:\Dev_program\Tor Browser\Browser\firefox.exe')
    profile = FirefoxProfile(
        r'C:\Dev_program\Tor Browser\Browser\TorBrowser\Data\Browser\profile.default'
    )
    profile.set_preference('network.proxy.type', 1)
    profile.set_preference('network.proxy.socks', '127.0.0.1')
    profile.set_preference('network.proxy.socks_port', 9050)
    profile.set_preference("network.proxy.socks_remote_dns", False)
    profile.update_preferences()
    driver = webdriver.Firefox(
        firefox_profile=profile,
        executable_path=r'C:\Dev_program\geckodriver.exe')

    driver.implicitly_wait(time_to_wait=5)

    searchCount = bidSearchEnd - bidSearchStart + 1
    searchMok = int(np.ceil((bidSearchEnd - bidSearchStart + 1) / 50))

    taskId = 1

    while searchMok > 0:

        title_list = []
        intro_list = []
        author_intro_list = []
        category_top_list = []
        category_middle_list = []
        category_bottom_list = []
        ISBN_list = []
        writer_list = []
        translator_list = []
        painter_list = []
        publisher_list = []
        publish_date_list = []
        content_list = []
        bid_list = []
        image_list = []
        grade_list = []
        review_list = []
        writer_name = []
        writer_link = []
        writer_num = []
        writer_book_title = []
        writer_bid = []
        writer_isbn = []
        taskId_list = []
        taskContent_list = []
        str_list = []
        end_list = []
        complete_list = []
        errorDetail_list = []
        crawlerNum_list = []

        taskId_list.append(taskId)

        if (bidSearchStart + 49) < bidSearchEnd:
            taskContent = str(bidSearchStart) + '~' + str(bidSearchStart + 49)
        else:
            taskContent = str(bidSearchStart) + '~' + str(bidSearchEnd)

        taskContent_list.append(taskContent)
        str_now = datetime.datetime.now()
        str_time = str_now.strftime('%Y-%m-%d %H:%M:%S')
        str_list.append(str_time)

        for i in range(bidSearchStart, bidSearchStart + 50):

            url_det = 'https://book.naver.com/bookdb/book_detail.nhn?bid=' + \
                str(i)
            try:
                html_det = urlopen(url_det)

            except (HTTPError, URLError, IndexError) as e:
                errorDetail_list.append(e)
                end_now = datetime.datetime.now()
                end_time = end_now.strftime('%Y-%m-%d %H:%M:%S')
                end_list.append(end_time)
                complete_list.append('error')
                crawlerNum_list.append(11)
                task_list = []

                for taskId, taskContent, str_time, end_time, complete, errorDetail, crawlerNum in zip(
                        taskId_list, taskContent_list, str_list, end_list,
                        complete_list, errorDetail_list, crawlerNum_list):
                    task = {
                        "taskId": taskId,
                        "taskContent": taskContent,
                        "str_time": str_time,
                        "end_time": end_time,
                        "complete": complete,
                        "errorDetail": errorDetail,
                        "crawlerNum": crawlerNum
                    }
                    task_list.append(task)

                task_DF = ''
                task_DF = pd.DataFrame(task_list)

                conn = engine.connect()
                task_DF.to_sql(name='task',
                               con=engine,
                               if_exists='append',
                               index=False)
                conn.close()

                print(e)
                print('에러로 크롤링 종료')

                time.sleep(3)

                if bidSearchStart + 50 > bidSearchEnd:
                    driver.quit()
                else:
                    continue
            else:
                soup_det = BeautifulSoup(html_det, "html.parser")

            if '책정보,  :' in soup_det.text:
                print(str(i), "번 제외(삭제 서지)")
                continue

            else:
                pass

            book_info = soup_det.find('div', class_='book_info_inner')

            try:
                book_info_text = book_info.get_text()

            except AttributeError:
                print(str(i), "번 제외(Attr 에러)")
                continue

            if driver.current_url == 'https://nid.naver.com/nidlogin.login?svctype=128&a_version=2&viewtype=2&url=http://book.naver.com&surl=http://book.naver.com':
                continue
            else:
                pass

            try:
                book_intro = soup_det.find('div', id='bookIntroContent')
                book_intro_text = book_intro.get_text().replace('\n', '')
                intro_list.append(book_intro_text)

            except AttributeError:
                book_intro_text = ''
                intro_list.append(book_intro_text)

            try:
                author_intro = soup_det.find('div', id='authorIntroContent')
                author_intro_text = author_intro.get_text().replace('\n', '')
                author_intro_list.append(author_intro_text)

            except AttributeError:
                author_intro_text = ''
                author_intro_list.append(author_intro_text)

            try:
                category_top = soup_det.find('li', class_='select')
                category_top_text = category_top.get_text().replace('\n', '')
                category_top_list.append(category_top_text)

            except AttributeError:
                category_top_text = ''
                category_top_list.append(category_top_text)

            try:
                category_middle = soup_det.find('li', class_='select2')
                category_middle_text = category_middle.get_text().replace(
                    '\n', '')
                category_middle_list.append(category_middle_text)

            except AttributeError:
                category_middle_text = ''
                category_middle_list.append(category_middle_text)

            try:
                category_bottom = soup_det.find('li', class_='select3')
                category_bottom_text = category_bottom.get_text().replace(
                    '\n', '')
                category_bottom_list.append(category_bottom_text)

            except AttributeError:
                category_bottom_text = ''
                category_bottom_list.append(category_bottom_text)

            try:
                grade = soup_det.find(
                    "div", class_="txt_desc").find("strong").text[:-1]

            except AttributeError:
                grade = ''
                grade_list.append(grade)

            try:
                review = soup_det.find(
                    "a", id="txt_desc_point").find_all("strong")[1].text

            except AttributeError:
                review = ''
                review_list.append(review)

            bookinfo_line1 = book_info.find_all("div")[2]

            rel_name = bookinfo_line1.text
            rel_list = []

            for rel in bookinfo_line1.find_all("em"):
                rel_cate = rel.text
                rel_list.append(rel_cate)

            for r in range(0, len(rel_list)):
                rel_name = rel_name.replace(rel_list[r], '')

            rel_name = rel_name.split('|')

            publish_date = rel_name[-1]

            if len(publish_date) == 4:
                publish_date = publish_date + ".01.01"
            elif len(publish_date) == 6:
                publish_date = publish_date[:4] + "." + publish_date[4:] + ".01"
            elif publish_date == '':
                publish_date = '2025.01.01'

            if publish_date[0] != '1' and publish_date[0] != '2':
                publish_date = '2025.01.01'
                publish_date_list.append(publish_date)

            publisher = rel_name[-2]
            publisher_list.append(publisher)

            rel_name = rel_name[1:-2]

            rel_list = rel_list[1:]

            if (len(rel_list) and len(rel_name)) == 2:
                painter = rel_name[0].replace('\n', '')
                translator = rel_name[1].replace('\n', '')

            elif (len(rel_list) and len(rel_name)) == 1:
                if '역자' in rel_list:
                    translator = rel_name[0].replace('\n', '')
                    painter = ''
                else:
                    translator = ''
                    painter = rel_name[0].replace('\n', '')

            else:
                translator = ''
                painter = ''

            translator_list.append(translator)
            painter_list.append(painter)

            ISBN_str = book_info_text.find('ISBN') + 6
            ISBN_end = book_info_text.find('|', ISBN_str)

            if ISBN_end == -1:
                ISBN_end = book_info_text.find('\n', ISBN_str)

            ISBN = book_info_text[ISBN_str:ISBN_end]

            if '\n' in ISBN:
                ISBN = ISBN[0:int(ISBN.find('\n'))]

            ISBN_list.append(ISBN)

            content = ''
            content = get_text_list(soup_det.select("div.book_cnt"))
            if content == []:
                content = [""]
                content_list.append(content)
            else:
                content_list.append(content)

            for src in soup_det.find("div", class_="thumb_type").find_all("a"):
                bookImage = src.find("img")["src"]
                image_list.append(bookImage)

            writer_a = soup_det.find(
                "div",
                class_="book_info_inner").find_all("div")[2].find_all("a")[:-1]
            writer_book = soup_det.find("div",
                                        class_="book_info").find("a").text
            writer_book_bid = soup_det.find(
                "div", class_="book_info").find("a")["href"].split("=")[1]

            bid_list.append(writer_book_bid)
            title_list.append(writer_book)

            writer = soup_det.find("div", class_="book_info_inner").find_all(
                "div")[2].text.split("|")[0][3:].strip()
            writer_list.append(writer)

            for w in range(0, len(writer_a)):
                writer_n = writer_a[w].text
                writer_name.append(writer_n)
                writer_href = writer_a[w]["href"]
                writer_link.append(writer_href)
                writer_split = writer_a[w]["href"].split("=")[3]
                writer_num.append(writer_split)
                writer_book_title.append(writer_book)
                writer_bid.append(writer_book_bid)
                writer_isbn.append(ISBN)

            time.sleep(round(np.random.uniform(0.5, 1.4), 2))

            if i == bidSearchEnd:
                print('bid 번호에 대한 도서 ' + str(searchCount) + '권 크롤링이 완료되었습니다.')
                driver.quit()
                break

        book_list = []
        book_DF = ''

        for title, writer, translator, painter, publisher, publishDate, intro, content, authorIntro, categoryTop, categoryMiddle, categoryBottom, bid, ISBN, grade, review, image in zip(
                title_list, writer_list, translator_list, painter_list,
                publisher_list, publish_date_list, intro_list, content_list,
                author_intro_list, category_top_list, category_middle_list,
                category_bottom_list, bid_list, ISBN_list, grade_list,
                review_list, image_list):
            book = {
                "title": title,
                "writer": writer,
                "translator": translator,
                "painter": painter,
                "publisher": publisher,
                "publishDate": publishDate,
                "intro": intro,
                "content": content,
                "authorIntro": authorIntro,
                "categoryTop": categoryTop,
                "categoryMiddle": categoryMiddle,
                "categoryBottom": categoryBottom,
                "bid": bid,
                "ISBN": ISBN,
                "grade": grade,
                "review": review,
                "image": image
            }
            book_list.append(book)

        book_DF = pd.DataFrame(book_list)

        writer_info_list = []
        writer_DF = ''

        for num, name, bookTitle, bid, isbn, link in zip(
                writer_num, writer_name, writer_book_title, writer_bid,
                writer_isbn, writer_link):
            writer_info = {
                "num": num,
                "name": name,
                "bookTitle": bookTitle,
                "bid": bid,
                "isbn": isbn,
                "link": link
            }
            writer_info_list.append(writer_info)

        writer_DF = pd.DataFrame(writer_info_list)

        end_now = datetime.datetime.now()
        end_time = end_now.strftime('%Y-%m-%d %H:%M:%S')
        end_list.append(end_time)
        complete = 'complete'
        complete_list.append(complete)
        errorDetail = ''
        errorDetail_list.append(errorDetail)
        crawlerNum_list.append(11)

        task_list = []

        task_DF = ''

        for taskId, taskContent, str_time, end_time, complete, errorDetail, crawlerNum in zip(
                taskId_list, taskContent_list, str_list, end_list,
                complete_list, errorDetail_list, crawlerNum_list):
            task = {
                "taskId": taskId,
                "taskContent": taskContent,
                "str_time": str_time,
                "end_time": end_time,
                "complete": complete,
                "errorDetail": errorDetail,
                "crawlerNum": crawlerNum
            }
            task_list.append(task)

        task_DF = pd.DataFrame(task_list)

        conn = engine.connect()

        book_DF.to_sql(name='bid_crawling',
                       con=engine,
                       if_exists='append',
                       index=False)
        writer_DF.to_sql(name='writer_info',
                         con=engine,
                         if_exists='append',
                         index=False)
        task_DF.to_sql(name='task',
                       con=engine,
                       if_exists='append',
                       index=False)

        conn.close()

        searchMok -= 1
        bidSearchStart += 50
        taskId += 1

        time.sleep(1)

        return HttpResponseRedirect(reverse_lazy('bidIndex'))
Ejemplo n.º 26
0
    def _setup_common_firefox_based_fetcher(self, ff_profile: FirefoxProfile) -> None:
        """
        Performs the common setup procedures for Firefox based fetchers, including Firefox itself

        :param ff_profile: Firefox Profile created for the webdriver
        :type ff_profile: FirefoxProfile
        """
        # Get the executor URL
        self._selenium_executor_url = self._get_selenium_executor_url(
            self.container_host, self.container_port
        )

        # Install the extensions
        if self.remove_gdpr:
            self._install_xpi_extension(
                self._gdpr_extension_xpi,
                self._gdpr_extension_xpi_id,
                ff_profile.extensionsDir,
            )

        if self.export_har:
            self._install_xpi_extension(
                self._har_export_extension_xpi,
                self._har_export_extension_xpi_id,
                ff_profile.extensionsDir,
            )

            # Enable the network monitoring tools to record HAR
            ff_profile.set_preference("devtools.netmonitor.enabled", True)
            ff_profile.set_preference("devtools.toolbox.selectedTool", "netmonitor")
            ff_profile.set_preference("devtools.netmonitor.har.compress", False)
            ff_profile.set_preference(
                "devtools.netmonitor.har.includeResponseBodies", False
            )
            ff_profile.set_preference("devtools.netmonitor.har.jsonp", False)
            ff_profile.set_preference("devtools.netmonitor.har.jsonpCallback", False)
            ff_profile.set_preference("devtools.netmonitor.har.forceExport", False)
            ff_profile.set_preference(
                "devtools.netmonitor.har.enableAutoExportToFile", False
            )
            ff_profile.set_preference(
                "devtools.netmonitor.har.pageLoadedTimeout", "2500"
            )

        # Stop updates
        ff_profile.set_preference("app.update.enabled", False)

        # Disable JSON view page
        ff_profile.set_preference("devtools.jsonview.enabled", False)

        # Set connections to Tor if we need to use Tor
        if self.use_proxy_type == "tor":
            ff_profile.set_preference("network.proxy.type", 1)
            ff_profile.set_preference("network.proxy.socks_version", 5)
            ff_profile.set_preference("network.proxy.socks", str(self._proxy_host))
            ff_profile.set_preference("network.proxy.socks_port", int(self._proxy_port))
            ff_profile.set_preference("network.proxy.socks_remote_dns", True)

        elif self.use_proxy_type == "http":
            ff_profile.set_preference("network.proxy.type", 1)
            ff_profile.set_preference("network.proxy.proxy_over_tls", True)
            ff_profile.set_preference("network.proxy.share_proxy_settings", False)
            ff_profile.set_preference("network.proxy.http", str(self._proxy_host))
            ff_profile.set_preference("network.proxy.http_port", int(self._proxy_port))
            ff_profile.set_preference("network.proxy.ssl", str(self._proxy_host))
            ff_profile.set_preference("network.proxy.ssl_port", int(self._proxy_port))
            ff_profile.set_preference("network.proxy.ftp", str(self._proxy_host))
            ff_profile.set_preference("network.proxy.ftp_port", int(self._proxy_port))

        if self.disable_cookies:
            ff_profile.set_preference("network.cookie.cookieBehavior", 2)

        # Apply the preferences
        ff_profile.update_preferences()

        # Set selenium related options for Firefox Browser
        self._desired_capabilities = webdriver.DesiredCapabilities.FIREFOX.copy()
        self._selenium_options = webdriver.FirefoxOptions()
        self._selenium_options.profile = ff_profile

        if self.disable_javascript:
            self._selenium_options.preferences.update(
                {
                    "javascript.enabled": False,
                }
            )

        if self.export_har:
            self._selenium_options.add_argument("--devtools")
Ejemplo n.º 27
0
class TorBrowser:
    '''
    Refactoring for 2020 12 version of Tor Browser Bundle
    '''
    def __init__(self,
                 browser_path,
                 binary_path=None,
                 profile_path=None,
                 executable_path=None,
                 socks_port=9050,
                 control_port=9051,
                 extensions=None,
                 capabilities=None,
                 headless=False,
                 capture_screen=False,
                 url=None):

        assert browser_path is not None

        self.browser_path = browser_path
        self.binary_path = binary_path
        self.profile_path = profile_path
        self.executable_path = executable_path

        self.socks_port = socks_port
        self.control_port = control_port
        self.extensions = extensions
        self.capabilities = capabilities
        self.headless = headless
        self.capture_screen = capture_screen
        self.url = url

        self.profile = None
        self.binary = None  # firefox
        self.options = None
        self.webdriver = None
        self.tor_controller = TorController()  # Start tor process..

        self._initialize()

    def _initialize(self):

        self._init_browser()
        self.profile = FirefoxProfile(self.profile_path)
        self._init_profile()
        self._init_extensions()
        self._init_capabilities()
        self._init_binary()
        self._init_options()
        self._init_webdriver()

    def _init_browser(self):

        if self.binary_path is None:
            # tor-browser_en-US/Browser/firefox
            self.binary_path = os.path.join(self.browser_path,
                                            os.path.join('Browser', 'firefox'))
        if self.profile_path is None:
            # tor-browser_en-US/Browser/TorBrowser/Data/Browser/profile.default
            self.profile_path = os.path.join(
                self.browser_path,
                os.path.join(
                    'Browser',
                    os.path.join(
                        'TorBrowser',
                        os.path.join(
                            'Data', os.path.join('Browser',
                                                 'profile.default')))))

    def _init_profile(self):

        self.profile.set_preference('browser.cache.disk.enable', False)
        self.profile.set_preference('browser.cache.memory.enable', False)
        self.profile.set_preference('browser.cache.offline.enable', False)
        self.profile.set_preference('browser.startup.page', '0')
        self.profile.set_preference('browser.startup.homepage', 'about:newtab')
        self.profile.set_preference('network.http.use-cache', False)
        self.profile.set_preference('network.proxy.type', 1)
        self.profile.set_preference('network.proxy.socks', '127.0.0.1')
        self.profile.set_preference('network.proxy.socks_port',
                                    self.socks_port)
        self.profile.set_preference('extensions.torlauncher.promp_at_startup',
                                    0)
        self.profile.set_preference('network.http.use-cache', False)
        self.profile.set_preference('webdriver.load.strategy', 'conservative')
        self.profile.set_preference('extensions.torlauncher.start_tor', False)
        self.profile.set_preference(
            'extensions.torbutton.versioncheck_enabled', False)
        self.profile.set_preference('permissions.memory_only', False)
        # update_preference('webdriver.load.strategy', 'normal')
        # update_preference('app.update.enabled', False)
        # update_preference('extensions.torbutton.versioncheck_enabled', False)
        # update_preference('extensions.torbutton.prompted_language', True)
        # update_preference('extensions.torbutton.socks_port', self.socks_port)
        # update_preference('extensions.torlauncher.control_port', self.control_port)
        # update_preference('extensions.torlauncher.start_tor', True)
        # update_preference('extensions.torbutton.block_dis', False)
        # update_preference('extensions.torbutton.custom.socks_host', '127.0.0.1')
        # update_preference('extensions.torbutton.custom.socks_port', self.socks_port)
        # update_preference('extensions.torbutton.inserted_button', True)
        # update_preference('extensions.torbutton.launch_warning', False)
        # update_preference('privacy.spoof_english', 2)
        # update_preference('extensions.torbutton.loglevel', 2)
        # update_preference('extensions.torbutton.logmethod', 0)
        # update_preference('extensions.torbutton.settings_method', 'custom')
        # update_preference('extensions.torbutton.use_privoxy', False)
        # update_preference('extensions.torlauncher.control_port', self.control_port)
        # update_preference('extensions.torlauncher.loglevel', 2)
        # update_preference('extensions.torlauncher.logmethod', 0)
        # update_preference('extensions.torlauncher.prompt_at_startup', False)

        self.profile.update_preferences()

    def _init_extensions(self):

        if self.extensions is not None:
            for extension in self.extensions:
                self.profile.add_extension(extension)

    def _init_capabilities(self):

        if self.capabilities is None:
            self.capabilities = DesiredCapabilities.FIREFOX
            self.capabilities.update({
                'handlesAlerts': True,
                'databaseEnabled': True,
                'javascriptEnabled': True,
                'browserConnectionEnabled': True
            })

    def _init_binary(self):

        self.binary = FirefoxBinary(firefox_path=self.binary_path)
        self.binary.add_command_line_options('--class', '"Tor Browser"')

    def _init_options(self):

        if self.headless is True:
            self.options = Options()
            self.options.headless = self.headless

    def _init_webdriver(self):

        self.webdriver = webdriver.Firefox(
            firefox_profile=self.profile,
            firefox_binary=self.binary,
            timeout=60,
            capabilities=self.capabilities,
            executable_path=self.executable_path,
            options=self.options)

    def connect_url(self, url):

        self.webdriver.get(url)
        WebDriverWait(self.webdriver, timeout=30).until(
            expected_conditions.presence_of_element_located(
                (By.TAG_NAME, 'body')))

    def close(self):
        try:
            self.tor_controller.stop()
            self.webdriver.quit()
        except CannotSendRequest:
            logger.error('CannotSendRequest while quitting TorBrowserDriver')
            self.binary.kill()
        except Exception as e:
            logger.error('Exception while quitting TorBrowserDriver', e)

    def init_canvas_permission(self, url):
        '''
        Create a permission DB and add exception for the canvas image extraction.
        Otherwise screenshots taken by Selenium will be just blank images due to
        canvas fingerprinting defense in Tor Browser Bundle.
        '''
        import sqlite3
        from tld import get_tld

        connection = sqlite3.connect
        permission_db = connection(
            os.path.join(self.profile_path, 'permissions.sqlite'))
        cursor = permission_db.cursor()

        # http://mxr.mozilla.org/mozilla-esr31/source/build/automation.py.in
        cursor.execute("PRAGMA user_version=3")
        cursor.execute("""CREATE TABLE IF NOT EXISTS moz_hosts (
            id INTEGER PRIMARY KEY,
            host TEXT,
            type TEXT,
            permission INTEGER,
            expireType INTEGER,
            expireTime INTEGER,
            appId INTEGER,
            isInBrowserElement INTEGER)""")

        domain = get_tld(url)
        logger.debug('Adding canvas/extractData permission for %s' % domain)
        query = """INSERT INTO 'moz_hosts' VALUES (NULL, '%s', 'canvas/extractData', 1, 0, 0, 0, 0);""" % domain
        cursor.execute(query)
        permission_db.commit()
        cursor.close()

    def take_screenshot(self, save_path):

        if save_path is not None:
            save_path = os.path.join(save_path, 'screenshot.png')
        else:
            save_path = 'screenshot.png'

        self.webdriver.get_screenshot_as_file(save_path)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()
Ejemplo n.º 28
0
class Layer(plonetesting.Layer):

    profile = None
    headless = False
    _browser = 'firefox'

    def setUp(self):
        if 'http_address' not in self:
            raise KeyError("No base layer has set self['http_address']")

        browser = os.environ.get('GOCEPT_WEBDRIVER_BROWSER')
        headless = os.environ.get('GOCEPT_SELENIUM_HEADLESS')

        if headless is None or headless.lower() not in ['true', 'false']:
            warnings.warn('GOCEPT_SELENIUM_HEADLESS invalid. \
                          Possible values are true and false. Got: %s.\
                          Falling back to default (false).' %
                          os.environ.get('GOCEPT_SELENIUM_HEADLESS'))
            headless = 'false'

        if headless.lower() == 'true':
            self.headless = True

        if browser is None or browser.lower() not in ['chrome', 'firefox']:
            warnings.warn('GOCEPT_WEBDRIVER_BROWSER invalid. \
                          Possible values are firefox and chrome. Got: %s.\
                          Falling back to firefox.' %
                          os.environ.get('GOCEPT_WEBDRIVER_BROWSER'))
            browser = 'firefox'

        if browser.lower() == 'chrome':
            self._browser = 'chrome'
        else:
            self.profile = FirefoxProfile(
                os.environ.get(
                    'GOCEPT_WEBDRIVER_FF_PROFILE',
                    os.environ.get('GOCEPT_SELENIUM_FF_PROFILE')))
            self.profile.native_events_enabled = True
            self.profile.update_preferences()

        self._start_selenium()
        atexit.register(self._stop_selenium)

    def tearDown(self):
        self._stop_selenium()
        # XXX upstream bug, quit should reset session_id
        self['seleniumrc'].session_id = None
        del self['seleniumrc']

    def _start_selenium(self):
        if self._browser == 'firefox':
            options = selenium.webdriver.FirefoxOptions()

            if self.headless:
                options.add_argument('-headless')

            self['seleniumrc'] = selenium.webdriver.Firefox(
                firefox_profile=self.profile, options=options)

        if self._browser == 'chrome':
            options = selenium.webdriver.ChromeOptions()
            options.add_argument('--disable-dev-shm-usage')

            if self.headless:
                options.add_argument('--headless')
            else:
                raise NotImplementedError(
                    'Chromedriver currently only works headless.')

            self['seleniumrc'] = selenium.webdriver.Chrome(
                options=options,
                service_args=['--log-path=chromedriver.log'])

    def _stop_selenium(self):
        # Only stop selenium if it is still active.
        if (self.get('seleniumrc') is None or
                self['seleniumrc'].session_id is None):
            return

        # Quit also removes the tempdir the ff profile is copied in.
        self['seleniumrc'].quit()
Ejemplo n.º 29
0
class ProxiedBrowser(object):
    def __init__(self, dir, tor_port=9150):
        self.dir = dir
        self.tor_port = int(tor_port)

    def open(self):
        cap_dir = join(self.dir, "caps")
        try:
            os.makedirs(cap_dir)
        except:
            pass
        self.proxy = TCP(cap_dir=cap_dir, tor_port=self.tor_port)
        self.proxy.start()
        while not self.proxy.running:
            logger.info("Waiting for proxy to start...")
            sleep(1)

        logger.info("Starting Xvfb virtual display")
        self.vdisplay = Xvfb(width=1280, height=740)
        self.vdisplay.start()

        logger.info("Webdriver starting..")
        self.binary = FirefoxBinary(firefox_path=join(self.dir, FIREFOX_PATH),
                                    log_file=open("firefox.log", "w"))
        self.binary.add_command_line_options("--verbose")
        self.profile = FirefoxProfile(
            profile_directory=join(self.dir, FIREFOX_PROFILE))
        self.profile.set_preference("network.proxy.socks_port",
                                    self.proxy.port)
        self.profile.set_preference("extensions.torlauncher.start_tor",
                                    False)  # We started tor manually earlier.
        self.profile.set_preference("browser.startup.homepage", "")
        self.profile.set_preference("browser.startup.page", 0)
        self.profile.update_preferences()

        try:
            self.driver = webdriver.Firefox(firefox_binary=self.binary,
                                            firefox_profile=self.profile)
            sleep(2)  # hack: wait until homepage etc have loaded.
        except Exception as ex:
            self.proxy.close()
            raise ex
        return self

    def __enter__(self):
        return self.open()

    def get(self, url):
        print("Fetching {url}".format(url=url))
        self.proxy.consume_results(
        )  # clear anything previous, e.g the browsers homepage, whatever update checkers etc.
        self.driver.get(url)
        capture_files = self.proxy.consume_results()
        responses = list()
        for capture_file in capture_files:
            responses += extract_from_capturefile(capture_file)
            os.remove(capture_file)
        return responses

    def close(self):
        logging.info("Closing webdriver")
        self.driver.quit()
        logging.info("Closing virtual display")
        self.vdisplay.stop()
        logging.info("Closing proxy")
        self.proxy.close()
        self.proxy.join()

    def __exit__(self, type, value, traceback):
        self.close()
Ejemplo n.º 30
0
def create_profile(path=None, user_agent=None, accept_language=None,
                   proxy=None, proxy_type=None, no_proxy_hosts=None,
                   download_directory=None,
                   download_content_type=None):
    """
    @paramDownload_content_type: CSV string
    """

    if path is not None:
        profile = FirefoxProfile(path)
    else:
        profile = FirefoxProfile()

    # Memory and cpu optimization
    profile.set_preference('browser.sessionhistory.max_total_viewers', 0)
    #profile.set_preference('browser.cache.memory.enable', False)
    #profile.set_preference('browser.cache.offline.enable', False)
    #profile.set_preference('browser.cache.disk.enable', False)
    profile.set_preference('browser.safebrowsing.enabled', False)
    profile.set_preference('browser.shell.checkDefaultBrowser', False)
    profile.set_preference('browser.startup.page', 0)
    profile.set_preference('dom.ipc.plugins.enabled.timeoutSecs', 15)
    profile.set_preference('dom.max_script_run_time', 10)
    profile.set_preference('extensions.checkCompatibility', False)
    profile.set_preference('extensions.checkUpdateSecurity', False)
    profile.set_preference('extensions.update.autoUpdateEnabled', False)
    profile.set_preference('extensions.update.enabled', False)
    profile.set_preference('network.http.max-connections-per-server', 30)
    profile.set_preference('network.prefetch-next', False)
    profile.set_preference('plugin.default_plugin_disabled', False)
    profile.set_preference('print.postscript.enabled', False)
    profile.set_preference('toolkit.storage.synchronous', 0)
    profile.set_preference('image.animation_mode', 'none')
    profile.set_preference('images.dither', False)
    profile.set_preference('content.notify.interval', 1000000)
    profile.set_preference('content.switch.treshold', 100000)
    profile.set_preference('nglayout.initialpaint.delay', 1000000)
    profile.set_preference('network.dnscacheentries', 200)
    profile.set_preference('network.dnscacheexpiration', 600)

    if user_agent is not None:
        profile.set_preference("general.useragent.override", user_agent)

    if accept_language is not None:
        profile.set_preference('intl.accept_languages', accept_language)

    if proxy is not None:
        logging.debug('Setting up proxy %s [%s]' % (proxy, proxy_type))
        server, port = proxy.split(':')
        if proxy_type == 'socks5':
            profile.set_preference("network.proxy.socks", server)
            profile.set_preference("network.proxy.socks_port", int(port))
        elif proxy_type == 'http':
            profile.set_preference("network.proxy.http", server)
            profile.set_preference("network.proxy.http_port", int(port))
        else:
            raise Exception('Unkown proxy type: %s' % proxy_type)
        profile.set_preference("network.proxy.type", 1)

    if no_proxy_hosts is not None:
        csv = ', '.join(no_proxy_hosts)
        profile.set_preference('network.proxy.no_proxies_on',
                               'localhost, 127.0.0.1, %s' % csv)

    if download_directory is not None and download_content_type is not None:
        profile.set_preference("browser.download.folderList", 2)
        profile.set_preference("browser.download.manager.showWhenStarting", False)
        profile.set_preference("browser.download.dir", download_directory)
        profile.set_preference("browser.helperApps.neverAsk.saveToDisk",
                               download_content_type)

    profile.update_preferences()
    return profile