Ejemplo n.º 1
0
    def __init__(self, document_body=None, transport=None, **kwargs):
        """
        Create Grab instance
        """

        self.meta = {}
        self._doc = None
        self.config = default_config()
        self.config['common_headers'] = self.common_headers()
        self.cookies = CookieManager()
        self.proxylist = ProxyList()

        # makes pylint happy
        self.request_counter = None
        self.request_head = None
        self.request_body = None
        self.request_method = None
        self.transport_param = transport
        self.transport = None

        self.reset()
        if kwargs:
            self.setup(**kwargs)
        if document_body is not None:
            self.setup_document(document_body)
Ejemplo n.º 2
0
    def load_proxylist(self, source, source_type=None, proxy_type='http',
                       auto_init=True, auto_change=True,
                       **kwargs):
        self.proxylist = ProxyList()
        if isinstance(source, BaseProxySource):
            self.proxylist.set_source(source)
        elif isinstance(source, six.string_types):
            if source_type == 'text_file':
                self.proxylist.load_file(source, proxy_type=proxy_type)
            elif source_type == 'url':
                self.proxylist.load_url(source, proxy_type=proxy_type)
            else:
                raise SpiderMisuseError('Method `load_proxylist` received '
                                        'invalid `source_type` argument: %s'
                                        % source_type) 
        else:
            raise SpiderMisuseError('Method `load_proxylist` received '
                                    'invalid `source` argument: %s'
                                    % source) 

        self.proxylist_enabled = True
        self.proxy = None
        if not auto_change and auto_init:
            self.proxy = self.proxylist.get_random_proxy()
        self.proxy_auto_change = auto_change
Ejemplo n.º 3
0
 def test_get_next_proxy(self):
     pl = ProxyList()
     path = self.generate_plist_file('foo:1\nbar:1')
     pl.load_file(path)
     self.assertEqual(pl.get_next_proxy().host, 'foo')
     self.assertEqual(pl.get_next_proxy().host, 'bar')
     self.assertEqual(pl.get_next_proxy().host, 'foo')
     pl.load_file(path)
     self.assertEqual(pl.get_next_proxy().host, 'foo')
Ejemplo n.º 4
0
    def load_proxylist(self, source, source_type, proxy_type='http',
                       auto_init=True, auto_change=True,
                       **kwargs):
        self.proxylist = ProxyList(source, source_type, proxy_type=proxy_type, **kwargs)

        self.proxylist_enabled = True
        self.proxy = None
        if not auto_change and auto_init:
            self.proxy = self.proxylist.get_random()
        self.proxy_auto_change = auto_change
Ejemplo n.º 5
0
 def test_get_next_proxy(self):
     with temp_file() as path:
         plist = ProxyList()
         self.generate_plist_file(path, 'foo:1\nbar:1')
         plist.load_file(path)
         self.assertEqual(plist.get_next_proxy().host, 'foo')
         self.assertEqual(plist.get_next_proxy().host, 'bar')
         self.assertEqual(plist.get_next_proxy().host, 'foo')
         plist.load_file(path)
         self.assertEqual(plist.get_next_proxy().host, 'foo')
Ejemplo n.º 6
0
    def __init__(self, document_body=None, transport='pycurl', **kwargs):
        """
        Create Grab instance
        """

        self.meta = {}
        self._doc = None
        self.config = default_config()
        self.config['common_headers'] = self.common_headers()
        self.cookies = CookieManager()
        self.proxylist = ProxyList()
        self.setup_transport(transport)
        self.reset()
        if kwargs:
            self.setup(**kwargs)
        if document_body is not None:
            self.setup_document(document_body)
Ejemplo n.º 7
0
    def load_proxylist(self,
                       source,
                       source_type=None,
                       proxy_type='http',
                       auto_init=True,
                       auto_change=True):
        """
        Load proxy list.

        :param source: Proxy source.
            Accepts string (file path, url) or ``BaseProxySource`` instance.
        :param source_type: The type of the specified source.
            Should be one of the following: 'text_file' or 'url'.
        :param proxy_type:
            Should be one of the following: 'socks4', 'socks5' or'http'.
        :param auto_change:
            If set to `True` then automatical random proxy rotation
            will be used.


        Proxy source format should be one of the following (for each line):
            - ip:port
            - ip:port:login:password

        """
        self.proxylist = ProxyList()
        if isinstance(source, BaseProxySource):
            self.proxylist.set_source(source)
        elif isinstance(source, six.string_types):
            if source_type == 'text_file':
                self.proxylist.load_file(source, proxy_type=proxy_type)
            elif source_type == 'url':
                self.proxylist.load_url(source, proxy_type=proxy_type)
            else:
                raise SpiderMisuseError('Method `load_proxylist` received '
                                        'invalid `source_type` argument: %s' %
                                        source_type)
        else:
            raise SpiderMisuseError('Method `load_proxylist` received '
                                    'invalid `source` argument: %s' % source)

        self.proxylist_enabled = True
        self.proxy = None
        if not auto_change and auto_init:
            self.proxy = self.proxylist.get_random_proxy()
        self.proxy_auto_change = auto_change
Ejemplo n.º 8
0
 def test_web_proxy_source(self):
     pl = ProxyList()
     self.server.response['data'] = DEFAULT_PLIST_DATA
     pl.load_url(self.server.get_url())
     self.assertEqual(2, pl.size())
Ejemplo n.º 9
0
 def test_file_proxy_source(self):
     pl = ProxyList()
     path = self.generate_plist_file()
     pl.load_file(path)
     self.assertEqual(2, pl.size())
Ejemplo n.º 10
0
 def test_basic(self):
     pl = ProxyList()
     self.assertEqual(0, pl.size())
Ejemplo n.º 11
0
 def test_file_proxy_source(self):
     with temp_file() as path:
         plist = ProxyList()
         self.generate_plist_file(path)
         plist.load_file(path)
         self.assertEqual(2, plist.size())