Ejemplo n.º 1
0
def generator_header():
    user_agent = read_random_lines(USER_AGENT_DIR, 5)[0]

    return {'User-Agent': user_agent,
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-us,en;q=0.7,zh-cn;q=0.3',
            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
            'Connection': 'keep-alive'}
Ejemplo n.º 2
0
def generator_header():
    user_agent = read_random_lines(USER_AGENT_DIR, 5)[0]

    return [('User-Agent', user_agent),
    ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
    ('Accept-Language', 'en-us,en;q=0.7,zh-cn;q=0.3'),
    ('Accept-Encoding', 'gzip,deflate'),
    ('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'),
    ('Connection', 'keep-alive')]
Ejemplo n.º 3
0
    def _init_instance(self, firefox_profile=None, firefox_binary=None, string_proxy=None, timeout=180, capabilities=None, proxy=None, profile_preferences={}, **kwargs):
        if firefox_profile is None:
            firefox_profile = LFirefoxProfile(profile_directory=kwargs.get('profile_directory', None)) #, is_temp=kwargs.get('is_temp', False))

        firefox_profile.set_preference('browser.cache.disk.capacity', 131072)
        firefox_profile.set_preference('browser.cache.disk.smart_size.enabled', False)
        firefox_profile.set_preference('extensions.killspinners.timeout', self.timeout - 2)
        firefox_profile.set_preference('extensions.killspinners.disablenotify', True)
        firefox_profile.set_preference('extensions.firebug.showFirstRunPage', False)
        firefox_profile.set_preference('datareporting.healthreport.uploadEnabled', False)
        firefox_profile.set_preference('datareporting.healthreport.service.firstRun', False)

        firefox_profile.set_preference('webdriver.firefox.profile', 'D:\\profiles\\xx')


        firefox_profile.set_preference('network.proxy.type', 0)
        if string_proxy:
            urlinfo = urlparse.urlparse(string_proxy)

            if urlinfo.scheme == 'ssh':
                # forwarding_ip = socket.gethostbyname(socket.gethostname())
                # forwarding_port = free_port()

                self.bitvise = Bitvise(urlinfo.hostname, urlinfo.port, username=urlinfo.username, password=urlinfo.password) #, forwarding_ip=forwarding_ip, forwarding_port=forwarding_port)
                forwarding_ip, forwarding_port = self.bitvise.start()
                time.sleep(2)
                # if self.tunnel.login(urlinfo.hostname, urlinfo.username, urlinfo.password, port=urlinfo.port, proxyport=localprot):
                firefox_profile.set_preference('network.proxy.type', 1)
                firefox_profile.set_preference('network.proxy.socks', forwarding_ip)
                firefox_profile.set_preference('network.proxy.socks_port', forwarding_port)
                firefox_profile.set_preference('network.proxy.socks_remote_dns', True)
            elif urlinfo.scheme == 'socks5':
                firefox_profile.set_preference('network.proxy.type', 1)
                firefox_profile.set_preference('network.proxy.socks', urlinfo.hostname)
                firefox_profile.set_preference('network.proxy.socks_port', urlinfo.port)
                firefox_profile.set_preference('network.proxy.socks_remote_dns', True)

        if kwargs.get('random_ua', False):
            user_agent = read_random_lines(USER_AGENT_DIR, 5)[0]
            firefox_profile.set_preference('general.useragent.override', user_agent)


        for k, v in profile_preferences.items():
            firefox_profile.set_preference(k, v)

        # if sys.platform == 'win32':
        #     executable_path = os.path.join(conf.GECKODRIVER_HOME, 'geckodriver.exe')
        # else:
        #     executable_path = os.path.join(conf.GECKODRIVER_HOME, 'geckodriver')
        webdriver.Firefox.__init__(self, firefox_profile=firefox_profile, firefox_binary=firefox_binary, timeout=timeout, capabilities=capabilities, proxy=None) #, executable_path=executable_path)

        self.set_page_load_timeout(self.timeout)
        self.implicitly_wait(self.wait_timeout)
        self.set_script_timeout(self.script_timeout)
Ejemplo n.º 4
0
    def __init__(self,
                 executable_path=PHANTOMJS_PATH,
                 port=0,
                 desired_capabilities=DesiredCapabilities.PHANTOMJS,
                 service_args=[],
                 service_log_path=None,
                 string_proxy=None,
                 timeout=180,
                 **kwargs):

        self.timeout = timeout
        self.wait_time = 0.5

        user_agent = read_random_lines(USER_AGENT_DIR, 5)[0]
        _desired_capabilities = {
            'phantomjs.page.settings.loadImages':
            False,
            'phantomjs.page.settings.resourceTimeout':
            '%s' % self.timeout * 1000,
            'phantomjs.page.settings.userAgent':
            kwargs.get('user_agent', user_agent),
            'page.settings.loadImages':
            False,
            'page.settings.resourceTimeout':
            '%s' % self.timeout * 1000,
            'page.settings.userAgent':
            kwargs.get('user_agent', user_agent)
        }

        desired_capabilities.update(_desired_capabilities)

        if string_proxy:
            urlinfo = urlparse(string_proxy)
            if urlinfo.scheme == 'socks5':
                service_args = [
                    '--proxy=%s:%s' % (urlinfo.hostname, urlinfo.port),
                    '--proxy-type=socks5',
                ]

        super(BrowserPhantomJS,
              self).__init__(executable_path=executable_path,
                             port=0,
                             desired_capabilities=desired_capabilities,
                             service_args=service_args,
                             service_log_path=None)

        self.wait_timeout = kwargs.get('wait_timeout', self.timeout)
        self.script_timeout = kwargs.get('script_timeout', self.timeout)
        self.set_page_load_timeout(self.timeout)
        # self.set_window_size(random.randint(1000, 2200), random.randint(800, 1500))
        self.set_window_size(1680, 1050)