Python Request.dont_filter Exemples, scrapy.http.Request.dont_filter Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : BerlusconiMarketForumSpider.py Projet : alexmason528/torforum_crawler

    def make_request(self, reqtype='regular', **kwargs):
        if 'url' in kwargs:
            kwargs['url'] = self.make_url(kwargs['url'])

        # Handle the requests.
        # If you need to bypass DDoS protection, put it in here.

        if reqtype is 'dologin':
            req = self.craft_login_request_from_form(kwargs['response'])
            req.meta['shared'] = False
            req.priority = 10
        elif reqtype is 'loginpage':
            req = Request(self.make_url('loginpage'), dont_filter=True)
            req.meta['shared'] = False
            req.priority = 15
            req.dont_filter = True
        elif reqtype is 'regular':
            req = Request(kwargs['url'], headers=self.user_agent)
            req.meta['shared'] = True

        # Some meta-keys that are shipped with the request.
        if 'relativeurl' in kwargs:
            req.meta['relativeurl'] = kwargs['relativeurl']
        if 'dont_filter' in kwargs:
            req.dont_filter = kwargs['dont_filter']
        if 'req_once_logged' in kwargs:
            req.meta['req_once_logged'] = kwargs['req_once_logged']
        if 'shared' in kwargs:
            req.meta['shared'] = kwargs['shared']

        req.meta['proxy'] = self.proxy
        req.meta['slot'] = self.proxy
        req.meta[
            'reqtype'] = reqtype  # We tell the type so that we can redo it if login is required
        return req

Exemple #2

0

Afficher le fichier

    def make_request(self, reqtype='regular', **kwargs):
        if 'url' in kwargs:
            kwargs['url'] = self.make_url(kwargs['url'])

        if reqtype == 'index':
            req = Request(self.make_url('index'), headers=self.tor_browser)
            req.meta['shared'] = False
            req.dont_filter = True
        elif reqtype == 'loginpage':
            req = Request(self.make_url('login'), headers=self.tor_browser)
            req.dont_filter = True
            req.meta['shared'] = False
        elif reqtype == 'dologin':
            req = self.request_from_login_page(kwargs['response'])
            req.dont_filter = True
            req.meta['shared'] = False
        elif reqtype == 'captcha_img':
            req = Request(kwargs['url'], headers=self.tor_browser)
            req.dont_filter = True
            req.meta['shared'] = False
        elif reqtype in ['image']:
            req = Request(self.make_url(kwargs['url']),
                          headers=self.tor_browser)
            req.meta['shared'] = True
        elif reqtype == 'regular':
            req = Request(self.make_url(kwargs['url']),
                          headers=self.tor_browser)
            req.meta['shared'] = True

        # Set sharing.
        if 'shared' in kwargs:
            req.meta['shared'] = kwargs['shared']
        elif reqtype == 'regular':
            req.meta['shared'] = True
        else:
            req.meta['shared'] = False

        # Using kwargs you can set a regular request to not being shared.
        if 'dont_filter' in kwargs:
            req.dont_filter = kwargs['dont_filter']
        if 'priority' in kwargs:
            req.priority = kwargs['priority']
        if 'req_once_logged' in kwargs:
            req.meta['req_once_logged'] = kwargs['req_once_logged']

        # Some default'ish options.
        req.meta['reqtype'] = reqtype
        req.meta['proxy'] = self.proxy
        req.meta['slot'] = self.proxy

        # return req
        return self.set_priority(req)

Exemple #3

0

Afficher le fichier

    def make_request(self, reqtype, **kwargs):

        if 'url' in kwargs:
            kwargs['url'] = self.make_url(kwargs['url'])

        if reqtype == 'index':
            req = Request(self.make_url('index'))
            req.dont_filter = True

        elif reqtype == 'dologin':

            data = {
                'login': self.login['username'],
                'register': '0',
                'password': self.login['password'],
                'cookie_check': '1',
                '_xfToken': "",
                'redirect': self.resource('index')
            }

            if 'captcha_question_hash' in kwargs:
                data['captcha_question_hash'] = kwargs['captcha_question_hash']

            if 'captcha_question_answer' in kwargs:
                data['captcha_question_answer'] = kwargs[
                    'captcha_question_answer']

            req = FormRequest(self.make_url('login-postform'),
                              formdata=data,
                              callback=self.handle_login_response,
                              dont_filter=True)
            #req.method = 'POST' # Has to be uppercase !
            req.meta['req_once_logged'] = kwargs['req_once_logged']
            req.dont_filter = True

        elif reqtype in ['threadlisting', 'userprofile']:
            req = Request(kwargs['url'])
            req.meta['shared'] = True

        elif reqtype == 'threadpage':
            req = Request(kwargs['url'])
            req.meta['threadid'] = kwargs['threadid']
            req.meta['shared'] = True
        else:
            raise Exception('Unsuported request type ' + reqtype)

        req.meta[
            'reqtype'] = reqtype  # We tell the type so that we can redo it if login is required
        req.meta['proxy'] = self.proxy  #meta[proxy] is handled by scrapy.

        return req

Exemple #4

0

Afficher le fichier

Fichier : CannabisGrowersCoopSpider.py Projet : lionheart1022/torforum_crawler

    def make_request(self, reqtype, **kwargs):

        if 'url' in kwargs:
            kwargs['url'] = self.make_url(kwargs['url'])

        if reqtype == 'index':
            req = Request(self.make_url('index'))
            if 'donotparse' in kwargs:
                req.meta['donotparse'] = True
            req.dont_filter = True
        elif reqtype == 'captcha_img':
            req = Request(kwargs['url'])
            req.dont_filter = True

        elif reqtype == 'dologin':
            req = self.create_request_from_login_page(kwargs['response'])
            req.meta['req_once_logged'] = kwargs['req_once_logged']
            req.dont_filter = True
        elif reqtype in [
                'ads_list', 'ads', 'ads_ratings', 'user', 'image',
                'user_ratings', 'ads_images'
        ]:
            req = Request(self.make_url(kwargs['url']))
            req.meta['shared'] = True

        if reqtype == 'ads':
            req.meta['product_rating_for'] = kwargs['ads_id']

        if reqtype == 'user_ratings':
            req.meta['user_rating_for'] = kwargs['username']
            req.meta['username'] = kwargs['username']

        if reqtype == 'ads_ratings':
            req.meta['ads_rating_for'] = kwargs['ads_id']
            req.meta['ads_id'] = kwargs['ads_id']

        req.meta[
            'reqtype'] = reqtype  # We tell the type so that we can redo it if login is required
        req.meta['proxy'] = self.proxy  #meta[proxy] is handled by scrapy.
        req.meta['slot'] = self.proxy

        if 'priority' in kwargs:
            req.priority = kwargs['priority']

        if 'accepted_currencies' in kwargs:
            req.meta['accepted_currencies'] = kwargs['accepted_currencies']

        if 'sublisting_quantity' in kwargs:
            req.meta['sublisting_quantity'] = kwargs['sublisting_quantity']

        return req

Exemple #5

0

Afficher le fichier

    def make_request(self, reqtype, **kwargs):

        passthru = ['category', 'escrow', 'username']

        if 'url' in kwargs:
            kwargs['url'] = self.make_url(kwargs['url'])

        if reqtype == 'index':
            req = Request(self.make_url('index'))
            req.dont_filter = True
        elif reqtype == 'dologin':
            req = self.craft_login_request_from_form(kwargs['response'])
            req.dont_filter = True
        elif reqtype == 'captcha':
            req = Request(self.make_url(kwargs['url']))
            req.dont_filter = True
        elif reqtype == 'image':
            req = Request(self.make_url(kwargs['url']))

        elif reqtype in [
                'category', 'product', 'userprofile', 'userproduct', 'userpgp',
                'userfeedback'
        ]:
            req = Request(self.make_url(kwargs['url']))
            req.meta['shared'] = True
        else:
            raise Exception('Unsuported request type %s ' % reqtype)

        req.meta[
            'reqtype'] = reqtype  # We tell the type so that we can redo it if login is required
        req.meta['proxy'] = self.proxy  #meta[proxy] is handled by scrapy.

        if 'priority' in kwargs:
            req.priority = kwargs['priority']

        if 'dont_filter' in kwargs:
            req.dont_filter = kwargs['dont_filter']

        if reqtype == 'userfeedback':
            req.meta['user_rating_for'] = kwargs['username']

        for k in passthru:
            if k in kwargs:
                req.meta[k] = kwargs[k]

        if 'req_once_logged' in kwargs:
            req.meta['req_once_logged'] = kwargs['req_once_logged']

        return req

Exemple #6

0

Afficher le fichier

    def make_request(self, reqtype='regular', **kwargs):
        if 'url' in kwargs:
            kwargs['url'] = self.make_url(kwargs['url'])
        # Handle the requests.
        # If you need to bypass DDoS protection, put it in here.
        if reqtype is 'dologin':
            req = self.craft_login_request_from_form(kwargs['response'])
            req.dont_filter = True
        elif reqtype is 'loginpage':
            req = Request(self.make_url('loginpage'),
                          dont_filter=True,
                          headers=self.tor_browser)
        elif reqtype is 'regular':
            req = Request(kwargs['url'], headers=self.tor_browser)
            req.meta[
                'shared'] = True  # Ensures that requests are shared among spiders.
        # Some meta-keys that are shipped with the request.
        if 'relativeurl' in kwargs:
            req.meta['relativeurl'] = kwargs['relativeurl']
        if 'dont_filter' in kwargs:
            req.dont_filter = kwargs['dont_filter']
        if 'req_once_logged' in kwargs:
            req.meta['req_once_logged'] = kwargs['req_once_logged']
        req.meta['proxy'] = self.proxy
        req.meta['slot'] = self.proxy

        return self.set_priority(req)

Exemple #7

0

Afficher le fichier

Fichier : IDC2Spider.py Projet : alexmason528/torforum_crawler

    def make_request(self, reqtype='regular', **kwargs):
        if 'url' in kwargs:
            kwargs['url'] = self.make_url(kwargs['url'])
        # Handle the requests.
        if reqtype is 'dologin':
            req = self.craft_login_request_from_form(kwargs['response']) 
            req.dont_filter = True
        elif reqtype is 'loginpage':
            req = Request(self.make_url('loginpage'), dont_filter=True, headers=self.tor_browser)
        elif reqtype is 'regular':
            req = Request(kwargs['url'], headers=self.tor_browser)
            req.meta['shared'] = True 

        if 'relativeurl' in kwargs:
            req.meta['relativeurl'] = kwargs['relativeurl']
        if 'dont_filter' in kwargs:
            req.dont_filter = kwargs['dont_filter']
        if 'req_once_logged' in kwargs:
            req.meta['req_once_logged'] = kwargs['req_once_logged']  

        req.meta['proxy'] = self.proxy  
        req.meta['slot'] = self.proxy
        req.meta['reqtype'] = reqtype   

        return self.set_priority(req)

Exemple #8

0

Afficher le fichier

Fichier : api.py Projet : zanachka/scrapydo

def _fetch_in_reactor(url, spider_cls=DefaultSpider, **kwargs):
    """Fetches an URL and returns the response.

    Parameters
    ----------
    url : str
        An URL to fetch.
    spider_cls : scrapy.Spider (default: DefaultSpider)
        A spider class to be used in the crawler.
    kwargs : dict, optional
        Additional arguments to be passed to ``_run_spider_in_reactor``.

    Returns
    -------
    crochet.EventualResult

    """
    def parse(self, response):
        self.response = response

    req = Request(url) if isinstance(url, six.string_types) else url
    req.dont_filter = True
    req.meta['handle_httpstatus_all'] = True
    spider_cls = override_start_requests(spider_cls, [req], parse=parse)
    return _run_spider_in_reactor(spider_cls, **kwargs)

Exemple #9

0

Afficher le fichier

    def make_request(self, reqtype='regular', **kwargs):
        if 'url' in kwargs and reqtype != 'captcha':
            kwargs['url'] = self.make_url(kwargs['url'])

        if reqtype is 'dologin':
            req = self.do_login(kwargs['response'])
        elif reqtype is 'regular':
            req = Request(kwargs['url'])
            req.meta["shared"] = True
        elif reqtype is 'captcha':
            captcha_full_url = self.spider_settings["endpoint1"] + \
                kwargs['url']
            req = Request(captcha_full_url)
        elif reqtype is 'loginpage':
            login_url = self.spider_settings["endpoint1"] + "login"
            req = Request(login_url, dont_filter=True)
        elif reqtype is 'forum_home':
            req = Request(self.spider_settings["endpoint"])

        # Some meta-keys that are shipped with the request.
        if 'dont_filter' in kwargs:
            req.dont_filter = kwargs['dont_filter']
        if 'shared' in kwargs:
            req.meta['shared'] = kwargs['shared']
        if 'req_once_logged' in kwargs:
            req.meta['req_once_logged'] = kwargs['req_once_logged']

        req.meta['proxy'] = self.proxy
        req.meta['slot'] = self.proxy
        # We tell the type so that we can redo it if login is required
        req.meta['reqtype'] = reqtype
        return req

Exemple #10

0

Afficher le fichier

    def make_request(self, reqtype, **kwargs):

        if 'url' in kwargs:
            kwargs['url'] = self.make_url(kwargs['url'])

        if reqtype == 'index':
            req = Request(self.make_url('index'), dont_filter=True)
        elif reqtype == 'loginpage':
            req = Request(self.make_url('loginpage'), dont_filter=True)
        elif reqtype == 'dologin':
            req = self.craft_login_request_from_form(kwargs['response'])
            req.dont_filter = True
        elif reqtype == 'captcha_img':
            req = Request(self.make_url(kwargs['url']), dont_filter=True)
        elif reqtype in ['threadlisting', 'thread', 'userprofile']:
            req = Request(self.make_url(kwargs['url']))
            req.meta['shared'] = True
            if 'relativeurl' in kwargs:
                req.meta['relativeurl'] = kwargs['relativeurl']
        else:
            raise Exception('Unsuported request type ' + reqtype)

        req.meta[
            'reqtype'] = reqtype  # We tell the type so that we can redo it if login is required
        req.meta['proxy'] = self.proxy  #meta[proxy] is handled by scrapy.

        if 'req_once_logged' in kwargs:
            req.meta['req_once_logged'] = kwargs['req_once_logged']

        return req

Exemple #11

0

Afficher le fichier

    def make_request(self, reqtype, **kwargs):

        passthru_kwargs = ['category', 'relativeurl']

        if 'url' in kwargs:
            kwargs['url'] = self.make_url(kwargs['url'])

        if reqtype == 'index':
            req = Request(self.make_url('index'))
            req.dont_filter = True
        elif reqtype == 'loginpage':
            req = Request(self.make_ur('login'))
            req.dont_filter = True
        elif reqtype == 'dologin_username':
            req = req = self.craft_login_username_request_from_form(
                kwargs['response'])
            req.dont_filter = True
        elif reqtype == 'dologin_password':
            req = req = self.craft_login_password_request_from_form(
                kwargs['response'])
            req.dont_filter = True
        elif reqtype == 'image':
            req = Request(url=kwargs['url'])
            if 'referer' in kwargs:
                req.headers['Referer'] = kwargs['referer']

        elif reqtype in ['category', 'listing', 'userprofile']:
            req = Request(url=kwargs['url'])
            req.meta['shared'] = True
        else:
            raise Exception('Unsuported request type %s ' % reqtype)

        for arg in passthru_kwargs:
            if arg in kwargs:
                req.meta[arg] = kwargs[arg]

        if reqtype == 'listing':
            req.meta['product_rating_for'] = kwargs['ads_id']

        req.meta[
            'reqtype'] = reqtype  # We tell the type so that we can redo it if login is required
        req.meta['proxy'] = self.proxy  # meta[proxy] is handled by scrapy.

        if 'req_once_logged' in kwargs:
            req.meta['req_once_logged'] = kwargs['req_once_logged']

        return req

Exemple #12

0

Afficher le fichier

Fichier : middlewares.py Projet : zhaozhao17/PythonSpider

    def process_response(self, request: Request, response, spider):  # 使用实例
        # print('当前请求ip：', request.meta.get('proxy'))
        # spider.logger.info('输出地址 {}'.format(response.url))
        # response.text

        if response.status == 200:
            if "快捷登录" not in response.text or "密码登录" not in response.text:
                # print(response.text)
                return response
            else:
                logger.debug("页面数据异常,再次尝试请求......")
                request.dont_filter = True
                time.sleep(1)
                return request
        else:
            logger.debug("请求出错,再次尝试请求......")
            request.dont_filter = True
            return request

Exemple #13

0

Afficher le fichier

Fichier : HansaMarketSpider.py Projet : lionheart1022/torforum_crawler

	def make_request(self, reqtype,  **kwargs):

		if 'url' in kwargs:
			kwargs['url'] = self.make_url(kwargs['url'])

		if reqtype == 'index':
			req = Request(self.make_url('index'))
			req.dont_filter=True
		elif reqtype == 'loginpage':
			req = Request(self.make_url('loginpage'))
			req.dont_filter=True
		elif reqtype == 'dologin':
			req = self.craft_login_request_from_form(kwargs['response'])
			req.dont_filter=True
		elif reqtype == 'captcha':
			req = Request(self.make_url(kwargs['url']))
			req.dont_filter=True
		elif reqtype=='ddos_protection':
			req = self.create_request_from_ddos_protection(kwargs['response'])
			req.dont_filter=True
		elif reqtype in ['category', 'listing', 'userprofile', 'listing_feedback', 'user_feedback', 'image']:
			req = Request(self.make_url(kwargs['url']))
			req.meta['shared'] = True
		else:
			raise Exception('Unsuported request type %s ' % reqtype)

		if reqtype == 'listing_feedback':
			req.meta['product_rating_for'] = kwargs['listing_id']

		elif reqtype == 'user_feedback':
			req.meta['user_rating_for'] = kwargs['username']

		req.meta['reqtype'] = reqtype   # We tell the type so that we can redo it if login is required
		req.meta['proxy'] = self.proxy  #meta[proxy] is handled by scrapy.

		if 'req_once_logged' in kwargs:
			req.meta['req_once_logged'] = kwargs['req_once_logged']


		if reqtype == 'user_feedback':	# Disabled user feedback because it is redundant with ads_feedback
			return None	

		return req

Exemple #14

0

Afficher le fichier

Fichier : ForumSpiderV2.py Projet : lionheart1022/torforum_crawler

    def make_request(self, **kwargs):
        if 'url' in kwargs:
            kwargs['url'] = self.make_url(kwargs['url'])

        req = Request(kwargs['url'])

        if 'dont_filter' in kwargs:
            req.dont_filter = kwargs['dont_filter']

        req.meta['proxy'] = self.proxy  #meta[proxy] is handled by scrapy.
        req.meta['slot'] = self.proxy

        return req

Exemple #15

0

Afficher le fichier

    def create_spider_request(self, kwargs):
        url = kwargs.pop('url')
        try:
            req = Request(url, **kwargs)
        except (TypeError, ValueError) as e:
            msg = "Error while creating Scrapy Request, {}"
            message = msg.format(str(e))
            raise Error('400', message=message)

        req.dont_filter = True
        msg = u"Created request for spider {} with url {} and kwargs {}"
        msg = msg.format(self.spider_name, url, repr(kwargs))
        log.msg(msg)
        return req

Exemple #16

0

Afficher le fichier

Fichier : core.py Projet : cgc1983/scrapyrt

    def create_spider_request(self, kwargs):
        url = kwargs.pop('url')
        try:
            req = Request(url, **kwargs)
        except (TypeError, ValueError) as e:
            # Bad arguments for scrapy Request
            # we don't want to schedule spider if someone
            # passes meaingless arguments to Request.
            # We must raise this here so that this will be returned to client,
            # Otherwise if this is raised in spider_opened it goes to
            # spider logs where it does not really belong.
            # It is needed because in POST handler we can pass
            # all possible requests kwargs, so it is easy to make mistakes.
            message = "Error while creating Request, {}".format(e.message)
            raise Error('400', message=message)

        req.dont_filter = True
        msg = u"Created request for spider {} with url {} and kwargs {}"
        msg = msg.format(self.spider_name, url, repr(kwargs))
        log.msg(msg)
        return req

Exemple #17

0

Afficher le fichier

Fichier : core.py Projet : agz1990/scrapyrt

    def create_spider_request(self, kwargs):
        url = kwargs.pop('url')
        try:
            req = Request(url, **kwargs)
        except (TypeError, ValueError) as e:
            # Bad arguments for scrapy Request
            # we don't want to schedule spider if someone
            # passes meaingless arguments to Request.
            # We must raise this here so that this will be returned to client,
            # Otherwise if this is raised in spider_idle it goes to
            # spider logs where it does not really belong.
            # It is needed because in POST handler we can pass
            # all possible requests kwargs, so it is easy to make mistakes.
            message = "Error while creating Request, {}".format(e.message)
            raise Error('400', message=message)

        req.dont_filter = True
        msg = u"Created request for spider {} with url {} and kwargs {}"
        msg = msg.format(self.spider_name, url, repr(kwargs))
        log.msg(msg)
        return req

Exemple #18

0

Afficher le fichier

Fichier : video_detail.py Projet : hackrole/scrapy-utils

 def start_requests(self):
     docs = self.db_adapter.get_videos(self._item)
     for doc in docs:
         site_url = doc.get('site_url', '')
         parse_method = self._get_parse_method(site_url)
         if not parse_method:
             continue
         url = doc.get('url', '')
         if not url:
             continue
         query = doc.get('query', '')
         item = VideoZjcmItem(doc=doc,
                              next_request=None, list_url='', query=query,
                              attachments=[], attachment_urls=[])
         meta = {
             'item': item
         }
         request = Request(url, callback=parse_method, meta=meta)
         # noinspection PyUnresolvedReferences
         request.dont_filter = True
         item['next_request'] = request
         yield self.item_or_request(item)

Exemple #19

0

Afficher le fichier

    def make_request(self, reqtype, **kwargs):
        if 'url' in kwargs:
            kwargs['url'] = self.make_url(kwargs['url'])

        if 'redirect_from' in kwargs:
            req = Request(kwargs['url'], headers=self.user_agent)
            req.meta['redirect_from'] = kwargs['redirect_from']
            req.dont_filter = True
        elif reqtype == 'index':
            req = Request(self.make_url('index'), headers=self.user_agent)
            req.dont_filter = True
        elif reqtype == 'ddos_protection':
            req = self.create_request_from_ddos_protection(kwargs['response'])
            req.meta['ddos_protection'] = True
            req.dont_filter = True
        elif reqtype == 'captcha':
            req = Request(kwargs['url'], headers=self.user_agent)
            req.dont_filter = True
        elif reqtype == 'dologin':
            req = self.create_request_from_login_page(kwargs['response'])
            req.dont_filter = True
        elif reqtype in ['threadlisting', 'thread']:
            req = Request(kwargs['url'], headers=self.user_agent)
            req.dont_filter = False
            req.meta['shared'] = True
        if reqtype == 'threadlisting':
            req.priority = 10

        req.meta['reqtype'] = reqtype # We tell the type so that we can redo it if login is required
        req.meta['proxy'] = self.proxy # meta[proxy] is handled by scrapy.

        if 'priority' in kwargs:
            req.priority = kwargs['priority']

        if 'req_once_logged' in kwargs:
            req.meta['req_once_logged'] = kwargs['req_once_logged']

        if 'shared' in kwargs:
            req.meta['shared'] = kwargs['shared']
        elif 'shared' not in kwargs:
            req.meta['shared'] = False


        return req

Exemple #20

0

Afficher le fichier

 def retry_request_with_get(self, request: Request) -> Iterator[Request]:
     request.method = 'GET'
     request.dont_filter = True
     yield request

Exemple #21

0

Afficher le fichier

Fichier : WallstreetMarket.py Projet : lionheart1022/torforum_crawler

    def make_request(self, reqtype, **kwargs):

        passthru = ['category']

        if 'url' in kwargs:
            kwargs['url'] = self.make_url(kwargs['url'])

        if reqtype == 'index':
            req = Request(self.make_url('index'))
            req.dont_filter = True
        elif reqtype == 'loginpage':
            req = Request(self.make_url('loginpage'))
            req.dont_filter = True
        elif reqtype == 'dologin':
            req = self.craft_login_request_from_form(kwargs['response'])
            req.dont_filter = True
        elif reqtype == 'captcha':
            req = Request(self.make_url(kwargs['url']))
            req.dont_filter = True
        elif reqtype == 'image':
            req = Request(self.make_url(kwargs['url']))
        elif reqtype == 'ddos_protection':
            req = self.create_request_from_ddos_protection(kwargs['response'])
            req.dont_filter = True
        elif reqtype == 'security_check':
            req = self.create_request_from_security_check(kwargs['response'])
            req.dont_filter = True
        elif reqtype == 'category':
            req = FormRequest.from_response(kwargs['response'],
                                            formcss=kwargs['formcss'],
                                            clickdata=kwargs['clickdata'])

        elif reqtype == 'category-page':  # Changing page is done with a POST form.
            btn = kwargs['btn']
            name = btn.xpath('@name').extract_first()  # "page"
            val = btn.xpath('@value').extract_first()  # page number
            data = {
                name: val,
                'dofilter': '0'
            }  # Careful, if dofilter is set to 1 (default value), page will be empty
            req = FormRequest.from_response(
                kwargs['response'],
                formdata=data,
                formxpath='//*[contains(@class, "pagination")]/ancestor::form')

            if req.url in self.yielded_category_page:
                if val in self.yielded_category_page[req.url]:
                    return None
                else:
                    self.yielded_category_page[req.url].append(val)
            else:
                self.yielded_category_page[req.url] = []

        elif reqtype in ['category', 'userprofile', 'offer', 'offer-refund']:
            req = Request(self.make_url(kwargs['url']))
            req.meta['shared'] = True
        else:
            raise Exception('Unsuported request type %s ' % reqtype)

        req.meta[
            'reqtype'] = reqtype  # We tell the type so that we can redo it if login is required
        req.meta['proxy'] = self.proxy  #meta[proxy] is handled by scrapy.

        if 'priority' in kwargs:
            req.priority = kwargs['priority']

        if reqtype == 'offer':
            offer_id = self.get_offer_id_from_url(req.url)
            req.meta['product_rating_for'] = offer_id

        for k in passthru:
            if k in kwargs:
                req.meta[k] = kwargs[k]

        if 'req_once_logged' in kwargs:
            req.meta['req_once_logged'] = kwargs['req_once_logged']

        return req