def process_response(self, request, response, spider): """ If we were logged out, login again and retry request. """ if request.meta.get('_autologin') and self.is_logout(response): autologin_meta = request.meta['_autologin'] if isinstance(autologin_meta['request'], dict): retryreq = request_from_dict(autologin_meta['request'], spider) else: retryreq = autologin_meta['request'].copy() retryreq.dont_filter = True logger.debug('Logout at %s: %s', retryreq.url, _response_cookies(response)) if self.logged_in: # We could have already done relogin after initial logout if any(autologin_meta['cookie_dict'].get(c['name']) != c['value'] for c in self.auth_cookies): logger.debug('Request was stale, will retry %s', retryreq) else: self.logged_in = False # It's better to re-login straight away yield self._ensure_login(retryreq, spider) logout_count = retryreq.meta['autologin_logout_count'] = ( retryreq.meta.get('autologin_logout_count', 0) + 1) if logout_count >= self.max_logout_count: logger.debug('Max logouts exceeded, will not retry %s', retryreq) raise IgnoreRequest else: logger.debug( 'Request caused log out (%d), still retrying %s', logout_count, retryreq) returnValue(retryreq) returnValue(response)
def update(self): to_update = [] if self.fixture: to_update.append(self.fixture_path) else: target = os.path.join(self.callback_dir, "*.bin") to_update = glob(target) for path in to_update: data, _, spider, _ = prepare_callback_replay(path) request = request_from_dict(data['request'], spider) response_cls = auto_import( data['response'].pop('cls', 'scrapy.http.HtmlResponse') ) response = response_cls( request=data["request"], **data['response']) data["result"], _ = parse_callback_result( request.callback(response), spider ) fixture_dir, filename = os.path.split(path) fixture_index = re.search(r"\d+", filename).group() add_sample(fixture_index, fixture_dir, filename, data) print("Fixture '{}' successfully updated.".format( os.path.relpath(path)))
def next_request(self): entry = self.collection.find_and_modify(sort={"$natural": self.queue_order}, remove=True) if entry: request = request_from_dict(entry["data"], self.spider) return request return None
def test(self): fx_result = data['result'] fx_version = data.get('python_version') request = request_from_dict(data['request'], spider) response = HtmlResponse(request=request, **data['response']) middlewares = [] middleware_paths = data['middlewares'] for mw_path in middleware_paths: try: mw_cls = load_object(mw_path) mw = create_instance(mw_cls, settings, crawler) middlewares.append(mw) except NotConfigured: continue crawler.signals.send_catch_log(signal=signals.spider_opened, spider=spider) for mw in middlewares: if hasattr(mw, 'process_spider_input'): mw.process_spider_input(response, spider) result = arg_to_iter(request.callback(response)) middlewares.reverse() for mw in middlewares: if hasattr(mw, 'process_spider_output'): result = mw.process_spider_output(response, result, spider) for index, (cb_obj, fx_item) in enumerate( six.moves.zip_longest(result, fx_result, fillvalue=NO_ITEM_MARKER)): if any(item == NO_ITEM_MARKER for item in (cb_obj, fx_item)): raise AssertionError( "The fixture's data length doesn't match with " "the current callback's output length.") cb_obj = parse_object(cb_obj, spider) fx_obj = fx_item['data'] if fx_item['type'] == 'request': clean_request(fx_obj, settings) clean_request(cb_obj, settings) else: clean_item(fx_obj, settings) clean_item(cb_obj, settings) if fx_version == 2 and six.PY3: fx_obj = binary_check(fx_obj, cb_obj, encoding) try: datadiff.tools.assert_equal(fx_obj, cb_obj) except AssertionError as e: six.raise_from( AssertionError( "Callback output #{} doesn't match recorded " "output:{}".format(index, e)), None)
def open(self, spider): self.spider = spider try: self.queue = load_object(self.queue_cls)( server=self.server, spider=spider, key=self.queue_key % { 'spider': spider.name }, serializer=self.serializer, ) except TypeError as e: raise ValueError("Failed to instantiate queue class '%s': %s", self.queue_cls, e) try: # for req_dict in self.col.find({"meta.category": {"$in": list(JdSpider.included_cat_list)}}, # {'_id': 0}): count = 0 for req_dict in self.col.find( { "meta.item.category": { "$in": list(JdSpider.included_cat_list) } }, {'_id': 0}): # for req_dict in self.col.find({}, {'_id': 0}): # if 'item' in req_dict['meta'] and len(req_dict['meta']['item'][GIF.SKUID]) >= 10: # continue print( '-------------------add failure request to queue-------------------' ) count += 1 req = request_from_dict(req_dict, spider) req.dont_filter = True req.meta['dont_redirect'] = False req.priority = 2 self.enqueue_request(req) print(count) finally: self.client.close() try: self.df = load_object(self.dupefilter_cls)( server=self.server, key=self.dupefilter_key % { 'spider': spider.name }, debug=spider.settings.getbool('DUPEFILTER_DEBUG'), ) except TypeError as e: raise ValueError("Failed to instantiate dupefilter class '%s': %s", self.dupefilter_cls, e) if self.flush_on_start: self.flush() # notice if there are requests already in the queue to resume the crawl if len(self.queue): spider.log("Resuming crawl (%d requests scheduled)" % len(self.queue))
def _make_request(self, mframe, hframe, body): try: request = request_from_dict(pickle.loads(body), self) except Exception as e: body = body.decode() request = scrapy.Request(body, callback=self.parse) return request
def next_request(self): entry = self.collection.find_and_modify(sort={"$natural":self.queue_order}, remove=True) if entry: request = request_from_dict(entry['data'], self.spider) return request return None
def get_config_requests(test_dir, spider, max_fixtures): curr_fixture_count = get_num_fixtures(test_dir) config = get_cb_settings(test_dir) try: requests_to_add = config.REQUESTS_TO_ADD except AttributeError: return [] defaults = { 'method': 'GET', 'headers': None, 'body': None, 'cookies': None, 'meta': None, '_encoding': 'utf-8', 'priority': 0, 'dont_filter': False, 'errback': None, 'flags': None, 'cb_kwargs': None } complete_requests = [] for req in requests_to_add: if curr_fixture_count < max_fixtures: for key, val in defaults.items(): req[key] = req.get(key, val) req['callback'] = _get_method(spider, test_dir.split('/')[-1]) req['meta']['_update'] = 1 req['meta']['_fixture'] = curr_fixture_count + 1 complete_requests.append(req) curr_fixture_count += 1 else: break complete_requests = [request_from_dict(req) for req in complete_requests] return complete_requests
def _decode_request(self, encoded_request): """ decode request :param encoded_request: :return: """ return request_from_dict(pickle.loads(encoded_request), self.spider)
def recurse_from_dict(self, node, spider=None): newnode = None if isinstance(node, dict): if '__response__' in node: if node['__response__'] == 'Response': cls = Response elif node['__response__'] == 'TextResponse': cls = TextResponse else: cls = HtmlResponse newnode = self.response_from_dict(node, cls) elif '__request__' in node: newnode = request_from_dict(node, spider) for k in newnode.meta: newnode.meta[k] = self.recurse_from_dict(newnode.meta[k], spider=spider) else: for k in node: newnode[k] = self.recurse_from_dict(node[k], spider=spider) elif isinstance(node, tuple): newnode= tuple(map(functools.partial(self.recurse_from_dict, spider=spider), node)) elif isinstance(node, list): newnode= map(functools.partial(self.recurse_from_dict, spider=spider), node) else: if isinstance(node, unicode): node = node.encode('utf8') newnode = node return newnode
def next_request(self): data = self.client.pop() if data is None or len(data) == 0: return None request = request_from_dict(marshal.loads(data), self.spider) return request
def pop(self): # use atomic range/remove using multi/exec pipe = self.redis.pipeline() pipe.multi() pipe.zrange(self.key, 0, 0).zremrangebyrank(self.key, 0, 0) results, count = pipe.execute() if results: return request_from_dict(marshal.loads(results[0]), self.spider)
def _dqpop(self): if self.queue: d = self.queue.get() if d: return request_from_dict( json.loads(d, object_hook=scrapy_request_decoder), self.spider )
def next_request(self): request = self.collection.find_and_modify({'last_downloaded': {'$exists': False}}, sort=[('priority', pymongo.DESCENDING)], update={'$set': {'last_downloaded': datetime.today()}}) if request: request = request_from_dict(request, spider=self.spider) self.stats.inc_value('scheduler/dequeued', spider=self.spider) return request
def dequeue_start_request(self): if self.requestqueue is None: return d = self.requestqueue.pop() if d is None: return self.stats.inc_value('startrequests/dequeued', spider=self) return request_from_dict(d, self)
def pop(self): request = super().pop() if not request: return None request = request_from_dict(request, self.spider) return request
def next_request(self): ''' Logic to handle getting a new url request, from a bunch of different queues ''' t = time.time() # update the redis queues every so often if t - self.update_time > self.update_interval: self.update_time = t self.create_queues() # update the ip address every so often if t - self.update_ip_time > self.ip_update_interval: self.update_ip_time = t self.update_ipaddress() self.report_self() item = self.find_item() if item: self.logger.debug("Found url to crawl {url}" \ .format(url=item['url'])) try: if 'request' in item: req = request_from_dict(pickle.loads(item['request']), self.spider) else: req = Request(item['url'], meta=make_splash_meta({})) except ValueError: # need absolute url # need better url validation here req = Request('http://' + item['url'], meta=make_splash_meta({})) if 'meta' in item: item = item['meta'] # defaults not in schema if 'curdepth' not in item: item['curdepth'] = 0 if "retry_times" not in item: item['retry_times'] = 0 for key in item.keys(): if key != 'request': req.meta[key] = item[key] # extra check to add items to request if 'useragent' in item and item['useragent'] is not None: req.headers['User-Agent'] = item['useragent'] if 'cookie' in item and item['cookie'] is not None: if isinstance(item['cookie'], dict): req.cookies = item['cookie'] elif isinstance(item['cookie'], basestring): req.cookies = self.parse_cookie(item['cookie']) return req return None
def _decode_request(self, encoded_request): """Decode an request previously encoded""" try: red_dict = pickle.loads(encoded_request) org_dict = RequestDeCompress.restore_request_dict(red_dict) return request_from_dict(org_dict, self.spider) except Exception, e: self.spider.log('Failed decode request:%s' % (e.message)) return None
def _decode_request(self, encoded_request): """Decode an request previously encoded""" obj = self.serializer.loads(encoded_request) spider = self.spider if obj['meta'].get('parser_request'): spider = self.spider.parse_spider self.__decode_parser_request__(obj, spider) return request_from_dict(obj, spider)
def next_request(self): ''' Logic to handle getting a new url request, from a bunch of different queues ''' t = time.time() # update the redis queues every so often if t - self.update_time > self.update_interval: self.update_time = t self.create_queues() # update the ip address every so often if t - self.update_ip_time > self.ip_update_interval: self.update_ip_time = t self.update_ipaddress() self.report_self() item = self.find_item() if item: self.logger.debug("Found url to crawl {url}" \ .format(url=item['url'])) try: if 'request' in item: req = request_from_dict(pickle.loads(item['request']), self.spider) else: req = Request(item['url'], meta=make_splash_meta({})) except ValueError: # need absolute url # need better url validation here req = Request('http://' + item['url'], meta=make_splash_meta({})) if 'meta' in item: item = item['meta'] # defaults not in schema if 'curdepth' not in item: item['curdepth'] = 0 if "retry_times" not in item: item['retry_times'] = 0 for key in item.keys(): if key != 'request' : req.meta[key] = item[key] # extra check to add items to request if 'useragent' in item and item['useragent'] is not None: req.headers['User-Agent'] = item['useragent'] if 'cookie' in item and item['cookie'] is not None: if isinstance(item['cookie'], dict): req.cookies = item['cookie'] elif isinstance(item['cookie'], basestring): req.cookies = self.parse_cookie(item['cookie']) return req return None
def next_request(self): item = self._retrieve_from_queue() if item: try: request = request_from_dict(item, self.spider) except KeyError: request = self._request_from_dict(item) return self._populate_request(request, item) else: return None
def next_request(self): entry = self.collection.find_and_modify(sort={"$natural": self.queue_order}, remove=True) if entry: request = request_from_dict(entry['data'], self.spider) if request and self.stats: self.stats.inc_value('scheduler/dequeued/mongodb', spider=self.spider) return request return None
def _make_request(self, mframe, hframe, body): try: request = request_from_dict(json.loads(body), self) except Exception: # body = body.decode() data = json.loads(body, encoding="utf-8") request = scrapy.Request(data['url'], callback=self.parse, dont_filter=True, meta=data['params']) return request
def peek(self): """Returns the next object to be returned by :meth:`pop`, but without removing it from the queue. Raises :exc:`NotImplementedError` if the underlying queue class does not implement a ``peek`` method, which is optional for queues. """ request = super().peek() if not request: return None return request_from_dict(request, self.spider)
def _decode_request(self, encoded_request): """Decode an request previously encoded""" try: red_dict = pickle.loads(encoded_request) org_dict = RequestDeCompress.restore_request_dict(red_dict) return request_from_dict(org_dict, self.spider) except Exception, e: #import traceback #print 'Failed decode reqeust' #print traceback.format_exc() self.spider.log('Failed decode request:%s, %s' % (e.message, encoded_request)) return None
def _make_request(self,mframe,hframe,body): """ 选择发起请求策略 simple,splash {"url":"https://www.xuexi.cn/lgpage/detail/index.html?id=10522373568484213565&item_id=10522373568484213565","fields":{"content":"/html/body/div[@id='root']/div[@class='main-view']/section[@class='_3GhgGH8Y4Zh8H0uBP5aUMD _3mVsbsHWKWuZwBS5zIrFO9']/div[@class='oSnRgpdW2BnrDruxKh9We _3mVsbsHWKWuZwBS5zIrFO9']/div/div/div[@class='Iuu474S1L6y5p7yalKQbW grid-gr']/div[@class='grid-cell'][2]/section[@class='_3GhgGH8Y4Zh8H0uBP5aUMD _3mVsbsHWKWuZwBS5zIrFO9']/div[@class='oSnRgpdW2BnrDruxKh9We _3mVsbsHWKWuZwBS5zIrFO9']/div/div/div[@class='Iuu474S1L6y5p7yalKQbW grid-gr']/div[@class='grid-cell']/div[@class='render-detail-article']/div[@class='render-detail-article-content']/div[@class='render-detail-content cke-mode']"},"type":1} """ try: item = json.loads(str(body, "utf-8")) except Exception: #request = request_from_dict(pickle.loads(body),self) logger.error("请求信息有误,忽略该请求") return request_from_dict(pickle.loads(body),self) return self.create_request(item)
def _decode_request(self, crawl_doc): """Decode an request previously encoded""" try: if not crawl_doc or not crawl_doc.request or not crawl_doc.request.meta: self.logger_.info('recalled request: %s', crawl_doc.url) return self.spider_._create_request(url=crawl_doc.url, page_type=crawl_doc.page_type, doc_type=crawl_doc.doc_type, schedule_doc_type=ScheduleDocType.RECRAWL_PLAY, dont_filter=True) red_dict = pickle.loads(crawl_doc.request.meta) request = request_from_dict(red_dict, self.spider_) request.meta['crawl_doc'] = crawl_doc return request except: self.logger_.exception('failed decode request: %s', crawl_doc) return None
def response_from_dict(responsed): respcls = load_object(responsed['cls']) request = request_from_dict(responsed['request']) response = respcls( encoding=responsed['encoding'], request=request, url=responsed['url'], status=responsed['status'], headers=responsed['headers'], body=responsed['body'], ) response.meta.update(responsed['meta']) return response
def test(self): fixture_objects = data['result'] request = request_from_dict(data['request'], spider) response = HtmlResponse(request=request, **data['response']) middlewares = [] middleware_paths = data['middlewares'] for mw_path in middleware_paths: try: mw_cls = load_object(mw_path) mw = create_instance(mw_cls, settings, crawler) middlewares.append(mw) except NotConfigured: continue middlewares.append(mw) crawler.signals.send_catch_log(signal=signals.spider_opened, spider=spider) for mw in middlewares: if hasattr(mw, 'process_spider_input'): mw.process_spider_input(response, spider) result = request.callback(response) or [] middlewares.reverse() for mw in middlewares: if hasattr(mw, 'process_spider_output'): result = mw.process_spider_output(response, result, spider) if isinstance(result, (Item, Request, dict)): result = [result] for index, _object in enumerate(result): fixture_data = fixture_objects[index]['data'] if fixture_objects[index].get('type') == 'request': clean_request(fixture_data, settings) else: clean_item(fixture_data, settings) _object = parse_object(_object, spider) self.assertEqual(fixture_data, _object, 'Not equal!')
def next_request(self): ''' Logic to handle getting a new url request, from a bunch of different queues ''' t = time.time() # update the redis queues every so often if t - self.update_time > self.update_interval: self.update_time = t self.create_queues() self.expire_queues() # update the ip address every so often if t - self.update_ip_time > self.ip_update_interval: self.update_ip_time = t self.update_ipaddress() self.report_self() item = self.find_item() if item: self.logger.debug(u"Found url to crawl {url}" \ .format(url=item['url'])) if 'meta' in item: # item is a serialized request req = request_from_dict(item, self.spider) else: # item is a feed from outside, parse it manually req = self.request_from_feed(item) # extra check to add items to request if 'useragent' in req.meta and req.meta['useragent'] is not None: req.headers['User-Agent'] = req.meta['useragent'] if 'cookie' in req.meta and req.meta['cookie'] is not None: if isinstance(req.meta['cookie'], dict): req.cookies = req.meta['cookie'] elif isinstance(req.meta['cookie'], string_types): req.cookies = self.parse_cookie(req.meta['cookie']) return req return None
def _decode_request(self, encoded_request): return request_from_dict(pickle.loads(encoded_request), self.spider)
def _dqpop(self): if self.dqs: d = self.dqs.pop() if d: return request_from_dict(d, self.spider)
def _assert_serializes_ok(self, request, spider=None): d = request_to_dict(request, spider=spider) request2 = request_from_dict(d, spider=spider) self._assert_same_request(request, request2)
def _decode_request(self, encoded_request): """Decode an request previously encoded""" red_dict = pickle.loads(encoded_request) org_dict = RequestDeCompress.restore_request_dict(red_dict) return request_from_dict(org_dict, self.spider)
def _decode_request(self, encoded_request): """Decode an request previously encoded""" request = request_from_dict(pickle.loads(encoded_request), self.spider) 'the decoded request is {}'.format(request) return request_from_dict(pickle.loads(encoded_request), self.spider)
def pop(self): request = super(ScrapyPriorityQueue, self).pop() if request and self.serialize: request = request_from_dict(request, self.spider) return request
def _decode_request(self, encoded_request): """Decode an request previously encoded""" if encoded_request.get("body") and encoded_request.get("body") is not None: encoded_request["body"]=zlib.decompress(base64.urlsafe_b64decode(encoded_request["body"].encode("utf-8"))) return request_from_dict(encoded_request, self.spider)
def _decode_request(self, encoded_request): """Decode an request previously encoded""" return request_from_dict(pickle.loads(encoded_request), self.spider)
def decode_request(data): """Decode an request previously encoded""" return request_from_dict(pickle.loads(data))
def _decode_request(self, encoded_request): """Decode an request previously encoded""" obj = self.serializer.loads(encoded_request) return request_from_dict(obj, self.spider)
def process_response(self, request: Request, response: Response, spider: Spider) -> Response: try: crawlera_meta = request.meta[META_KEY] except KeyError: crawlera_meta = {} if crawlera_meta.get( "skip") or not crawlera_meta.get("original_request"): return response original_request = request_from_dict(crawlera_meta["original_request"]) self.stats.inc_value("crawlera_fetch/response_count") self._calculate_latency(request) self.stats.inc_value("crawlera_fetch/api_status_count/{}".format( response.status)) if response.headers.get("X-Crawlera-Error"): message = response.headers["X-Crawlera-Error"].decode("utf8") self.stats.inc_value("crawlera_fetch/response_error") self.stats.inc_value( "crawlera_fetch/response_error/{}".format(message)) log_msg = "Error downloading <{} {}> (status: {}, X-Crawlera-Error header: {})" log_msg = log_msg.format( original_request.method, original_request.url, response.status, message, ) if self.raise_on_error: raise CrawleraFetchException(log_msg) else: logger.error(log_msg) return response try: json_response = json.loads(response.text) except json.JSONDecodeError as exc: self.stats.inc_value("crawlera_fetch/response_error") self.stats.inc_value( "crawlera_fetch/response_error/JSONDecodeError") log_msg = "Error decoding <{} {}> (status: {}, message: {}, lineno: {}, colno: {})" log_msg = log_msg.format( original_request.method, original_request.url, response.status, exc.msg, exc.lineno, exc.colno, ) if self.raise_on_error: raise CrawleraFetchException(log_msg) from exc else: logger.error(log_msg) return response if json_response.get("crawlera_error"): error = json_response["crawlera_error"] message = json_response["body"] self.stats.inc_value("crawlera_fetch/response_error") self.stats.inc_value( "crawlera_fetch/response_error/{}".format(error)) log_msg = ( "Error downloading <{} {}> (Original status: {}, Fetch API error message: {})" ) log_msg = log_msg.format( original_request.method, original_request.url, json_response["original_status"], message, ) if self.raise_on_error: raise CrawleraFetchException(log_msg) else: logger.error(log_msg) return response self.stats.inc_value("crawlera_fetch/response_status_count/{}".format( json_response["original_status"])) crawlera_meta["upstream_response"] = { "status": response.status, "headers": response.headers, "body": json_response, } respcls = responsetypes.from_args( headers=json_response["headers"], url=json_response["url"], body=json_response["body"], ) return response.replace( cls=respcls, request=original_request, headers=json_response["headers"], url=json_response["url"], body=json_response["body"], status=json_response["original_status"], )
def process_response(self, request: Request, response: Response, spider: Spider) -> Response: try: crawlera_meta = request.meta[META_KEY] except KeyError: crawlera_meta = {} if crawlera_meta.get("skip") or not crawlera_meta.get("original_request"): return response original_request = request_from_dict(crawlera_meta["original_request"], spider=spider) self.stats.inc_value("crawlera_fetch/response_count") self._calculate_latency(request) self.stats.inc_value("crawlera_fetch/api_status_count/{}".format(response.status)) if response.headers.get("X-Crawlera-Error"): message = response.headers["X-Crawlera-Error"].decode("utf8") self.stats.inc_value("crawlera_fetch/response_error") self.stats.inc_value("crawlera_fetch/response_error/{}".format(message)) log_msg = "Error downloading <{} {}> (status: {}, X-Crawlera-Error header: {})" log_msg = log_msg.format( original_request.method, original_request.url, response.status, message, ) if self.raise_on_error: raise CrawleraFetchException(log_msg) else: logger.warning(log_msg) return response try: json_response = json.loads(response.text) except json.JSONDecodeError as exc: self.stats.inc_value("crawlera_fetch/response_error") self.stats.inc_value("crawlera_fetch/response_error/JSONDecodeError") log_msg = "Error decoding <{} {}> (status: {}, message: {}, lineno: {}, colno: {})" log_msg = log_msg.format( original_request.method, original_request.url, response.status, exc.msg, exc.lineno, exc.colno, ) if self.raise_on_error: raise CrawleraFetchException(log_msg) from exc else: logger.warning(log_msg) return response server_error = json_response.get("crawlera_error") or json_response.get("error_code") original_status = json_response.get("original_status") request_id = json_response.get("id") or json_response.get("uncork_id") if server_error: message = json_response.get("body") or json_response.get("message") self.stats.inc_value("crawlera_fetch/response_error") self.stats.inc_value("crawlera_fetch/response_error/{}".format(server_error)) log_msg = ( "Error downloading <{} {}> (Original status: {}, " "Fetch API error message: {}, Request ID: {})" ) log_msg = log_msg.format( original_request.method, original_request.url, original_status or "unknown", message, request_id or "unknown", ) if self.raise_on_error: raise CrawleraFetchException(log_msg) else: logger.warning(log_msg) return response self.stats.inc_value("crawlera_fetch/response_status_count/{}".format(original_status)) crawlera_meta["upstream_response"] = { "status": response.status, "headers": response.headers, "body": json_response, } try: resp_body = base64.b64decode(json_response["body"], validate=True) except (binascii.Error, ValueError): resp_body = json_response["body"] respcls = responsetypes.from_args( headers=json_response["headers"], url=json_response["url"], body=resp_body, ) return response.replace( cls=respcls, request=original_request, headers=json_response["headers"], url=json_response["url"], body=resp_body, status=original_status or 200, )
def _dqpop(self): if self.domainmodel: d = self.domainmodel.q_pop() if d: return request_from_dict(json.loads(d), self.spider)
def pop(self): # use atomic range/remove using multi/exec result = self.server.lpop(self.key) if result: return request_from_dict(marshal.loads(result), self.spider)
def __decode_parser_request__(self, obj, spider): if obj['meta'].get('garment'): serialized_requests = obj['meta']['garment']['meta']['requests_queue'] requests_queue = [request_from_dict(req, spider) for req in serialized_requests] obj['meta']['garment']['meta']['requests_queue'] = requests_queue