Example #1
0
 def setup_transport(self, transport_param, reset=False):
     if self.transport is not None and not reset:
         raise error.GrabMisuseError(
             'Transport is already set up. Use'
             ' setup_transport(..., reset=True) to explicitly setup'
             ' new transport')
     if transport_param is None:
         transport_param = DEFAULT_TRANSPORT
     if isinstance(transport_param, six.string_types):
         if transport_param in TRANSPORT_ALIAS:
             transport_param = TRANSPORT_ALIAS[transport_param]
         if '.' not in transport_param:
             raise error.GrabMisuseError('Unknown transport: %s' %
                                         transport_param)
         else:
             mod_path, cls_name = transport_param.rsplit('.', 1)
             try:
                 cls = TRANSPORT_CACHE[(mod_path, cls_name)]
             except KeyError:
                 mod = __import__(mod_path, globals(), locals(), ['foo'])
                 cls = getattr(mod, cls_name)
                 TRANSPORT_CACHE[(mod_path, cls_name)] = cls
             self.transport = cls()
     elif isinstance(transport_param, collections.Callable):
         self.transport = transport_param()
     else:
         raise error.GrabMisuseError('Option `transport` should be string '
                                     'or class or callable. Got %s' %
                                     type(transport_param))
Example #2
0
    def load_proxylist(self,
                       source,
                       source_type,
                       proxy_type='http',
                       auto_init=True,
                       auto_change=True,
                       **kwargs):
        # self.proxylist = ProxyList(source, source_type,
        #                            proxy_type=proxy_type, **kwargs)
        if source_type == 'text_file':
            self.proxylist.set_source('file',
                                      location=source,
                                      proxy_type=proxy_type,
                                      **kwargs)
        elif source_type == 'url':
            self.proxylist.set_source('url',
                                      url=source,
                                      proxy_type=proxy_type,
                                      **kwargs)
        else:
            raise error.GrabMisuseError('Unknown proxy source type: %s' %
                                        source_type)

        # self.proxylist.setup(auto_change=auto_change, auto_init=auto_init)
        self.setup(proxy_auto_change=auto_change)
        if not auto_change and auto_init:
            self.change_proxy()
Example #3
0
    def process_cookie_options(self, grab, req):
        # `cookiefile` option should be processed before `cookies` option
        # because `load_cookies` updates `cookies` option
        if grab.config['cookiefile']:
            # Do not raise exception if cookie file does not exist
            try:
                grab.cookies.load_from_file(grab.config['cookiefile'])
            except IOError as ex:
                logging.error(ex)

        request_host = urlsplit(req.url).hostname
        if request_host:
            if request_host.startswith('www.'):
                request_host_no_www = request_host[4:]
            else:
                request_host_no_www = request_host

            # Process `cookies` option that is simple dict i.e.
            # it provides only `name` and `value` attributes of cookie
            # No domain, no path, no expires, etc
            # I pass these no-domain cookies to *each* requested domain
            # by setting these cookies with corresponding domain attribute
            # Trying to guess better domain name by removing leading "www."
            if grab.config['cookies']:
                if not isinstance(grab.config['cookies'], dict):
                    raise error.GrabMisuseError('cookies option should'
                                                ' be a dict')
                for name, value in grab.config['cookies'].items():
                    grab.cookies.set(name=name,
                                     value=value,
                                     domain=request_host_no_www)

        cookie_hdr = grab.cookies.get_cookie_header(req)
        if cookie_hdr:
            req.headers['Cookie'] = cookie_hdr
Example #4
0
    def setup_document(self, content, **kwargs):
        """
        Setup `response` object without real network requests.

        Useful for testing and debuging.

        All ``**kwargs`` will be passed to `Document` constructor.
        """

        self.reset()
        if isinstance(content, six.text_type):
            raise error.GrabMisuseError('Method `setup_document` accepts only '
                                        'byte string in `content` argument.')

        # Configure Document instance
        doc = Document(grab=self)
        doc.body = content
        doc.status = ''
        doc.head = b'HTTP/1.1 200 OK\r\n\r\n'
        doc.parse(charset=kwargs.get('document_charset'))
        doc.code = 200
        doc.total_time = 0
        doc.connect_time = 0
        doc.name_lookup_time = 0
        doc.url = ''

        for key, value in kwargs.items():
            setattr(doc, key, value)

        self.doc = doc
Example #5
0
File: base.py Project: subeax/grab
    def setup(self, **kwargs):
        """
        Setting up Grab instance configuration.
        """

        if 'hammer_mode' in kwargs:
            logging.error(
                'Option hammer_mode is deprecated. Grab does not support hammer mode anymore.'
            )
            del kwargs['hammer_mode']

        if 'hammer_timeouts' in kwargs:
            logging.error(
                'Option hammer_timeouts is deprecated. Grab does not support hammer mode anymore.'
            )
            del kwargs['hammer_timeouts']

        for key in kwargs:
            if not key in self.config.keys():
                raise error.GrabMisuseError('Unknown option: %s' % key)

        if 'url' in kwargs:
            if self.config.get('url'):
                kwargs['url'] = self.make_url_absolute(kwargs['url'])
        self.config.update(kwargs)
Example #6
0
    def setup(self, **kwargs):
        """
        Setting up Grab instance configuration.
        """

        json_kwarg = kwargs.pop('json', None)
        if json_kwarg is not None:
            if 'headers' in kwargs:
                headers = kwargs['headers'] = kwargs['headers'].copy()
            else:
                headers = kwargs['headers'] = {}
            headers.update({'Content-Type': 'application/json; charset=utf-8'})
            kwargs['post'] = json.dumps(json_kwarg)

        for key in kwargs:
            if key not in self.config.keys():
                raise error.GrabMisuseError('Unknown option: %s' % key)

        if 'url' in kwargs:
            if self.config.get('url'):
                kwargs['url'] = self.make_url_absolute(kwargs['url'])

        for header_field in ['headers', 'common_headers']:
            if header_field in kwargs and isinstance(kwargs[header_field],
                                                     str):
                headers = {[y.strip() for y in x.strip('> ').split(':', 1)]
                           for x in kwargs[header_field].split('\n')
                           if x.strip(' >')}
                kwargs[header_field] = headers

        self.config.update(kwargs)
Example #7
0
    def process_cookie_options(self, grab, request_url):
        host = urlsplit(request_url).netloc.split(':')[0]
        host_nowww = host
        if host_nowww.startswith('www.'):
            host_nowww = host_nowww[4:]

        # `cookiefile` option should be processed before `cookies` option
        # because `load_cookies` updates `cookies` option
        if grab.config['cookiefile']:
            grab.cookies.load_from_file(grab.config['cookiefile'])

        if grab.config['cookies']:
            if not isinstance(grab.config['cookies'], dict):
                raise error.GrabMisuseError('cookies option should be a dict')
            for name, value in grab.config['cookies'].items():
                if '.' in host_nowww:
                    domain = host_nowww
                else:
                    domain = ''
                grab.cookies.set(
                    #name=normalize_unicode(name, grab.config['charset']),
                    #value=normalize_unicode(value, grab.config['charset']),
                    name=name,
                    value=value,
                    domain=domain)

        # Erase known cookies stored in pycurl handler
        self.curl.setopt(pycurl.COOKIELIST, 'ALL')

        # Enable pycurl cookie processing mode
        self.curl.setopt(pycurl.COOKIELIST, '')

        # TODO: At this point we should use cookielib magic
        # to pick up cookies for the current requests
        for cookie in grab.cookies.cookiejar:
            cookie_domain = cookie.domain
            http_only = cookie_domain.startswith('#httponly_')
            if http_only:
                cookie_domain = cookie_domain.replace('#httponly_', '')
            if not cookie_domain or host_nowww in cookie_domain:
                encoded = encode_cookies({cookie.name: cookie.value},
                                         join=True,
                                         charset=grab.config['charset'])
                cookie_string = b'Set-Cookie: ' + encoded
                if len(cookie.path) != 0:
                    cookie_string += b'; path=' + cookie.path.encode('ascii')
                if '.' in host_nowww:
                    cookie_string += b'; domain=' + cookie_domain.encode(
                        'ascii')
                if http_only:
                    cookie_string += b'; HttpOnly'
                self.curl.setopt(pycurl.COOKIELIST, cookie_string)
Example #8
0
    def setup(self, **kwargs):
        """
        Setting up Grab instance configuration.
        """

        for key in kwargs:
            if key not in self.config.keys():
                raise error.GrabMisuseError('Unknown option: %s' % key)

        if 'url' in kwargs:
            if self.config.get('url'):
                kwargs['url'] = self.make_url_absolute(kwargs['url'])
        self.config.update(kwargs)
Example #9
0
 def setup_transport(self, transport_param):
     self.transport_param = transport_param
     if isinstance(transport_param, six.string_types):
         if transport_param in TRANSPORT_ALIAS:
             transport_param = TRANSPORT_ALIAS[transport_param]
         if not '.' in transport_param:
             raise error.GrabMisuseError('Unknown transport: %s' %
                                         transport_param)
         else:
             mod_path, cls_name = transport_param.rsplit('.', 1)
             try:
                 cls = TRANSPORT_CACHE[(mod_path, cls_name)]
             except KeyError:
                 mod = __import__(mod_path, globals(), locals(), ['foo'])
                 cls = getattr(mod, cls_name)
                 TRANSPORT_CACHE[(mod_path, cls_name)] = cls
             self.transport = cls()
     elif isinstance(transport_param, collections.Callable):
         self.transport = transport_param()
     else:
         raise error.GrabMisuseError('Option `transport` should be string '
                                     'or callable. Got %s' %
                                     type(transport_param))
Example #10
0
    def process_cookie_options(self, grab, request_url):
        request_host = urlsplit(request_url).netloc.split(':')[0]
        if request_host.startswith('www.'):
            request_host_no_www = request_host[4:]
        else:
            request_host_no_www = request_host

        # `cookiefile` option should be processed before `cookies` option
        # because `load_cookies` updates `cookies` option
        if grab.config['cookiefile']:
            # Do not raise exception if cookie file does not exist
            try:
                grab.cookies.load_from_file(grab.config['cookiefile'])
            except IOError as ex:
                logging.error(ex)

        # Process `cookies` option that is simple dict i.e.
        # it provides only `name` and `value` attributes of cookie
        # No domain, no path, no expires, etc
        # I pass these no-domain cookies to *each* requested domain
        # by setting these cookies with corresponding domain attribute
        # Trying to guess better domain name by removing leading "www."
        if grab.config['cookies']:
            if not isinstance(grab.config['cookies'], dict):
                raise error.GrabMisuseError('cookies option should be a dict')
            for name, value in grab.config['cookies'].items():
                grab.cookies.set(
                    name=name,
                    value=value,
                    domain=request_host_no_www
                )

        # Erase known cookies stored in pycurl handler
        self.curl.setopt(pycurl.COOKIELIST, 'ALL')

        # Enable pycurl cookie processing mode
        self.curl.setopt(pycurl.COOKIELIST, '')

        # Put all cookies from `grab.cookies.cookiejar` to
        # the pycurl instance.
        # We put *all* cookies, for all host names
        # Pycurl cookie engine is smart enough to send
        # only cookies belong to the current request's host name
        for cookie in grab.cookies.cookiejar:
            self.curl.setopt(pycurl.COOKIELIST,
                             self.get_netscape_cookie_spec(cookie,
                                                           request_host))
Example #11
0
File: base.py Project: subeax/grab
 def setup_transport(self, transport_param):
     self.transport_param = transport_param
     if isinstance(transport_param, basestring):
         mod_path, cls_name = transport_param.rsplit('.', 1)
         try:
             cls = TRANSPORT_CACHE[(mod_path, cls_name)]
         except KeyError:
             mod = __import__(mod_path, globals(), locals(), ['foo'])
             cls = getattr(mod, cls_name)
             TRANSPORT_CACHE[(mod_path, cls_name)] = cls
         self.transport = cls()
     elif isinstance(transport_param, collections.Callable):
         self.transport = transport_param()
     else:
         raise error.GrabMisuseError(
             'Option `transport` should be string or callable. '
             'Got %s' % type(transport_param))
Example #12
0
    def load_proxylist(self,
                       source,
                       source_type,
                       proxy_type='http',
                       auto_init=True,
                       auto_change=True):
        # self.proxylist = ProxyList(source, source_type,
        #                            proxy_type=proxy_type, **kwargs)
        if source_type == 'text_file':
            # pylint: disable=no-member
            self.proxylist.load_file(source, proxy_type=proxy_type)
            # pylint: enable=no-member
        elif source_type == 'url':
            # pylint: disable=no-member
            self.proxylist.load_url(source, proxy_type=proxy_type)
            # pylint: enable=no-member
        else:
            raise error.GrabMisuseError('Unknown proxy source type: %s' %
                                        source_type)

        # self.proxylist.setup(auto_change=auto_change, auto_init=auto_init)
        self.setup(proxy_auto_change=auto_change)  # pylint: disable=no-member
        if not auto_change and auto_init:
            self.change_proxy()  # pylint: disable=no-member
Example #13
0
File: curl.py Project: lyicy/grab
    def process_config(self, grab):
        """
        Setup curl instance with values from ``self.config``.
        """

        # Copy some config for future usage
        self.config_nobody = grab.config['nobody']
        self.config_body_maxsize = grab.config['body_maxsize']

        try:
            request_url = normalize_url(grab.config['url'])
        except Exception as ex:
            raise error.GrabInvalidUrl(u'%s: %s' %
                                       (six.text_type(ex), grab.config['url']))

        # py3 hack
        if not six.PY3:
            request_url = make_str(request_url)

        self.curl.setopt(pycurl.URL, request_url)

        # 30* redirects are handled by Grab
        self.curl.setopt(pycurl.FOLLOWLOCATION, 0)
        self.curl.setopt(pycurl.MAXREDIRS, grab.config['redirect_limit'])
        self.curl.setopt(pycurl.CONNECTTIMEOUT, grab.config['connect_timeout'])
        self.curl.setopt(pycurl.TIMEOUT, grab.config['timeout'])
        #self.curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
        # self.curl.setopt(pycurl.DNS_CACHE_TIMEOUT, 0)
        if not grab.config['connection_reuse']:
            self.curl.setopt(pycurl.FRESH_CONNECT, 1)
            self.curl.setopt(pycurl.FORBID_REUSE, 1)

        self.curl.setopt(pycurl.NOSIGNAL, 1)
        self.curl.setopt(pycurl.HEADERFUNCTION, self.header_processor)

        if grab.config['body_inmemory']:
            self.curl.setopt(pycurl.WRITEFUNCTION, self.body_processor)
        else:
            if not grab.config['body_storage_dir']:
                raise error.GrabMisuseError(
                    'Option body_storage_dir is not defined')
            self.setup_body_file(
                grab.config['body_storage_dir'],
                grab.config['body_storage_filename'],
                create_dir=grab.config['body_storage_create_dir'])
            self.curl.setopt(pycurl.WRITEFUNCTION, self.body_processor)

        if grab.config['verbose_logging']:
            self.verbose_logging = True

        # User-Agent
        if grab.config['user_agent'] is None:
            if grab.config['user_agent_file'] is not None:
                with open(grab.config['user_agent_file']) as inf:
                    lines = inf.read().splitlines()
                grab.config['user_agent'] = random.choice(lines)
            else:
                grab.config['user_agent'] = generate_user_agent()

        # If value is None then set empty string
        # None is not acceptable because in such case
        # pycurl will set its default user agent "PycURL/x.xx.x"
        if not grab.config['user_agent']:
            grab.config['user_agent'] = ''

        self.curl.setopt(pycurl.USERAGENT, grab.config['user_agent'])

        if grab.config['debug']:
            self.curl.setopt(pycurl.VERBOSE, 1)
            self.curl.setopt(pycurl.DEBUGFUNCTION, self.debug_processor)

        # Ignore SSL errors
        self.curl.setopt(pycurl.SSL_VERIFYPEER, 0)
        self.curl.setopt(pycurl.SSL_VERIFYHOST, 0)

        # Disabled to avoid SSL3_READ_BYTES:sslv3 alert handshake failure error
        # self.curl.setopt(pycurl.SSLVERSION, pycurl.SSLVERSION_SSLv3)

        if grab.request_method in ('POST', 'PUT'):
            if (grab.config['post'] is None
                    and grab.config['multipart_post'] is None):
                raise GrabMisuseError('Neither `post` or `multipart_post`'
                                      ' options was specified for the %s'
                                      ' request' % grab.request_method)

        if grab.request_method == 'POST':
            self.curl.setopt(pycurl.POST, 1)
            if grab.config['multipart_post']:
                if isinstance(grab.config['multipart_post'], six.string_types):
                    raise error.GrabMisuseError(
                        'multipart_post option could not be a string')
                post_items = normalize_http_values(
                    grab.config['multipart_post'],
                    charset=grab.config['charset'],
                    ignore_classes=(UploadFile, UploadContent),
                )
                # py3 hack
                #if six.PY3:
                #    post_items = decode_pairs(post_items,
                #                              grab.config['charset'])
                self.curl.setopt(pycurl.HTTPPOST,
                                 process_upload_items(post_items))
            elif grab.config['post']:
                post_data = normalize_post_data(grab.config['post'],
                                                grab.config['charset'])
                # py3 hack
                # if six.PY3:
                #    post_data = smart_unicode(post_data,
                #                              grab.config['charset'])
                self.curl.setopt(pycurl.POSTFIELDS, post_data)
            else:
                self.curl.setopt(pycurl.POSTFIELDS, '')
        elif grab.request_method == 'PUT':
            data = grab.config['post']
            if isinstance(data, six.text_type):
                # py3 hack
                # if six.PY3:
                #    data = data.encode('utf-8')
                # else:
                raise error.GrabMisuseError(
                    'Value of post option could be only '
                    'byte string if PUT method is used')
            self.curl.setopt(pycurl.UPLOAD, 1)
            self.curl.setopt(pycurl.CUSTOMREQUEST, 'PUT')
            self.curl.setopt(pycurl.READFUNCTION, StringIO(data).read)
            self.curl.setopt(pycurl.INFILESIZE, len(data))
        elif grab.request_method == 'PATCH':
            data = grab.config['post']
            if isinstance(data, six.text_type):
                raise error.GrabMisuseError(
                    'Value of post option could be only byte '
                    'string if PATCH method is used')
            self.curl.setopt(pycurl.UPLOAD, 1)
            self.curl.setopt(pycurl.CUSTOMREQUEST, 'PATCH')
            self.curl.setopt(pycurl.READFUNCTION, StringIO(data).read)
            self.curl.setopt(pycurl.INFILESIZE, len(data))
        elif grab.request_method == 'DELETE':
            self.curl.setopt(pycurl.CUSTOMREQUEST, 'DELETE')
        elif grab.request_method == 'HEAD':
            self.curl.setopt(pycurl.NOBODY, 1)
        elif grab.request_method == 'UPLOAD':
            self.curl.setopt(pycurl.UPLOAD, 1)
        elif grab.request_method == 'GET':
            self.curl.setopt(pycurl.HTTPGET, 1)
        elif grab.request_method == 'OPTIONS':
            data = grab.config['post']
            if data is not None:
                if isinstance(data, six.text_type):
                    raise error.GrabMisuseError(
                        'Value of post option could be only byte '
                        'string if PATCH method is used')
                self.curl.setopt(pycurl.UPLOAD, 1)
                self.curl.setopt(pycurl.READFUNCTION, StringIO(data).read)
                self.curl.setopt(pycurl.INFILESIZE, len(data))
            self.curl.setopt(pycurl.CUSTOMREQUEST, 'OPTIONS')
        else:
            raise error.GrabMisuseError('Invalid method: %s' %
                                        grab.request_method)

        headers = grab.config['common_headers']
        if grab.config['headers']:
            headers.update(grab.config['headers'])
        # This is required to avoid some problems
        headers.update({'Expect': ''})
        header_tuples = [str('%s: %s' % x) for x in headers.items()]
        self.curl.setopt(pycurl.HTTPHEADER, header_tuples)

        self.process_cookie_options(grab, request_url)

        if grab.config['referer']:
            self.curl.setopt(pycurl.REFERER, str(grab.config['referer']))

        if grab.config['proxy']:
            self.curl.setopt(pycurl.PROXY, str(grab.config['proxy']))
        else:
            self.curl.setopt(pycurl.PROXY, '')

        if grab.config['proxy_userpwd']:
            self.curl.setopt(pycurl.PROXYUSERPWD,
                             str(grab.config['proxy_userpwd']))

        if grab.config['proxy_type']:
            key = 'PROXYTYPE_%s' % grab.config['proxy_type'].upper()
            self.curl.setopt(pycurl.PROXYTYPE, getattr(pycurl, key))

        if grab.config['encoding']:
            if ('gzip' in grab.config['encoding']
                    and 'zlib' not in pycurl.version):
                raise error.GrabMisuseError(
                    'You can not use gzip encoding because '
                    'pycurl was built without zlib support')
            self.curl.setopt(pycurl.ENCODING, grab.config['encoding'])

        if grab.config['userpwd']:
            self.curl.setopt(pycurl.USERPWD, str(grab.config['userpwd']))

        if grab.config.get('interface') is not None:
            self.curl.setopt(pycurl.INTERFACE, grab.config['interface'])

        if grab.config.get('reject_file_size') is not None:
            self.curl.setopt(pycurl.MAXFILESIZE,
                             grab.config['reject_file_size'])
Example #14
0
File: curl.py Project: subeax/grab
    def process_config(self, grab):
        """
        Setup curl instance with values from ``self.config``.
        """

        # Copy some config for future usage
        self.config_nobody = grab.config['nobody']
        self.config_body_maxsize = grab.config['body_maxsize']

        try:
            request_url = normalize_url(grab.config['url'])
        except Exception as ex:
            raise error.GrabInvalidUrl(u'%s: %s' %
                                       (unicode(ex), grab.config['url']))

        # py3 hack
        if not PY3K:
            request_url = smart_str(request_url)

        self.curl.setopt(pycurl.URL, request_url)

        self.curl.setopt(pycurl.FOLLOWLOCATION,
                         1 if grab.config['follow_location'] else 0)
        self.curl.setopt(pycurl.MAXREDIRS, grab.config['redirect_limit'])
        self.curl.setopt(pycurl.CONNECTTIMEOUT, grab.config['connect_timeout'])
        self.curl.setopt(pycurl.TIMEOUT, grab.config['timeout'])
        self.curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
        #self.curl.setopt(pycurl.DNS_CACHE_TIMEOUT, 0)
        if not grab.config['connection_reuse']:
            self.curl.setopt(pycurl.FRESH_CONNECT, 1)
            self.curl.setopt(pycurl.FORBID_REUSE, 1)

        self.curl.setopt(pycurl.NOSIGNAL, 1)
        self.curl.setopt(pycurl.HEADERFUNCTION, self.head_processor)

        if grab.config['body_inmemory']:
            self.curl.setopt(pycurl.WRITEFUNCTION, self.body_processor)
        else:
            if not grab.config['body_storage_dir']:
                raise error.GrabMisuseError(
                    'Option body_storage_dir is not defined')
            self.setup_body_file(grab.config['body_storage_dir'],
                                 grab.config['body_storage_filename'])
            self.curl.setopt(pycurl.WRITEFUNCTION, self.body_processor)

        if grab.config['verbose_logging']:
            self.verbose_logging = True

        # User-Agent
        if grab.config['user_agent'] is None:
            if grab.config['user_agent_file'] is not None:
                with open(grab.config['user_agent_file']) as inf:
                    lines = inf.read().splitlines()
                grab.config['user_agent'] = random.choice(lines)
            else:
                grab.config['user_agent'] = random_user_agent()

        # If value is None then set empty string
        # None is not acceptable because in such case
        # pycurl will set its default user agent "PycURL/x.xx.x"
        if not grab.config['user_agent']:
            grab.config['user_agent'] = ''

        self.curl.setopt(pycurl.USERAGENT, grab.config['user_agent'])

        if grab.config['debug']:
            self.curl.setopt(pycurl.VERBOSE, 1)
            self.curl.setopt(pycurl.DEBUGFUNCTION, self.debug_processor)

        # Ignore SSL errors
        self.curl.setopt(pycurl.SSL_VERIFYPEER, 0)
        self.curl.setopt(pycurl.SSL_VERIFYHOST, 0)
        self.curl.setopt(pycurl.SSLVERSION, pycurl.SSLVERSION_SSLv3)

        if grab.request_method == 'POST':
            self.curl.setopt(pycurl.POST, 1)
            if grab.config['multipart_post']:
                if isinstance(grab.config['multipart_post'], basestring):
                    raise error.GrabMisuseError(
                        'multipart_post option could not be a string')
                post_items = normalize_http_values(
                    grab.config['multipart_post'],
                    charset=grab.config['charset'])
                # py3 hack
                if PY3K:
                    post_items = decode_pairs(post_items,
                                              grab.config['charset'])
                #import pdb; pdb.set_trace()
                self.curl.setopt(pycurl.HTTPPOST, post_items)
            elif grab.config['post']:
                post_data = normalize_post_data(grab.config['post'],
                                                grab.config['charset'])
                # py3 hack
                #if PY3K:
                #    post_data = smart_unicode(post_data, grab.config['charset'])
                self.curl.setopt(pycurl.COPYPOSTFIELDS, post_data)
            else:
                self.curl.setopt(pycurl.POSTFIELDS, '')
        elif grab.request_method == 'PUT':
            data = grab.config['post']
            if isinstance(data,
                          unicode) or (not PY3K
                                       and not isinstance(data, basestring)):
                # py3 hack
                #if PY3K:
                #    data = data.encode('utf-8')
                #else:
                raise error.GrabMisuseError('Value of post option could be only '\
                                            'byte string if PUT method is used')
            self.curl.setopt(pycurl.UPLOAD, 1)
            self.curl.setopt(pycurl.READFUNCTION, StringIO(data).read)
            self.curl.setopt(pycurl.INFILESIZE, len(data))
        elif grab.request_method == 'PATCH':
            data = grab.config['post']
            if isinstance(data, unicode) or not isinstance(data, basestring):
                # py3 hack
                if PY3K:
                    data = data.encode('utf-8')
                else:
                    raise error.GrabMisuseError('Value of post option could be only byte '\
                                                'string if PATCH method is used')
            self.curl.setopt(pycurl.UPLOAD, 1)
            self.curl.setopt(pycurl.CUSTOMREQUEST, 'PATCH')
            self.curl.setopt(pycurl.READFUNCTION, StringIO(data).read)
            self.curl.setopt(pycurl.INFILESIZE, len(data))
        elif grab.request_method == 'DELETE':
            self.curl.setopt(pycurl.CUSTOMREQUEST, 'delete')
        elif grab.request_method == 'HEAD':
            self.curl.setopt(pycurl.NOBODY, 1)
        elif grab.request_method == 'UPLOAD':
            self.curl.setopt(pycurl.UPLOAD, 1)
        elif grab.request_method == 'GET':
            self.curl.setopt(pycurl.HTTPGET, 1)
        else:
            raise error.GrabMisuseError('Invalid method: %s' %
                                        grab.request_method)

        headers = grab.config['common_headers']
        if grab.config['headers']:
            headers.update(grab.config['headers'])
        header_tuples = [str('%s: %s' % x) for x\
                         in headers.items()]
        self.curl.setopt(pycurl.HTTPHEADER, header_tuples)

        self.process_cookie_options(grab, request_url)

        if grab.config['referer']:
            self.curl.setopt(pycurl.REFERER, str(grab.config['referer']))

        if grab.config['proxy']:
            self.curl.setopt(pycurl.PROXY, str(grab.config['proxy']))
        else:
            self.curl.setopt(pycurl.PROXY, '')

        if grab.config['proxy_userpwd']:
            self.curl.setopt(pycurl.PROXYUSERPWD,
                             str(grab.config['proxy_userpwd']))

        if grab.config['proxy_type']:
            ptype = getattr(pycurl,
                            'PROXYTYPE_%s' % grab.config['proxy_type'].upper())
            self.curl.setopt(pycurl.PROXYTYPE, ptype)

        if grab.config['encoding']:
            if 'gzip' in grab.config[
                    'encoding'] and not 'zlib' in pycurl.version:
                raise error.GrabMisuseError('You can not use gzip encoding because '\
                                      'pycurl was built without zlib support')
            self.curl.setopt(pycurl.ENCODING, grab.config['encoding'])

        if grab.config['userpwd']:
            self.curl.setopt(pycurl.USERPWD, str(grab.config['userpwd']))

        if grab.config.get('interface') is not None:
            self.curl.setopt(pycurl.INTERFACE, grab.config['interface'])

        if grab.config.get('reject_file_size') is not None:
            self.curl.setopt(pycurl.MAXFILESIZE,
                             grab.config['reject_file_size'])