def get_url(self): json = self.get_raw_json() if (json == None): return None dic = ju.json2py(json) self.__last_pulled_item = dic return dic['link']
def parse(self, response): if (response.status == 200): json = response.body.decode('utf-8') data = ju.json2py(json) for item in data: news_dic = { 'title': item['title'], 'link': item['originalSource'], 'source': self.name } yield news_dic
def parse(self, response): if (response.status == 200): json = response.body.decode('utf-8')[16:].replace('category', '"category"').replace('item', '"item"') newses = ju.json2py(json)['item'] for n in newses: news_dic = { 'title': n[1], 'link': n[2], 'source': self.name } yield news_dic
def parse(self, response): if (response.status == 200): decode_flag = True json = '' try: json = response.body.decode('gbk')[14:-1] except Exception as e: print(e) decode_flag = False if (decode_flag == True): data = ju.json2py(json) for item in data: news_dic = { 'title': item['title'], 'link': item['docurl'], 'source': self.name } yield news_dic
def parse(self, response): if (response.status == 200): json = response.body.decode('gbk')[14:-1] data = ju.json2py(json) for item in data: news_dic = { 'title': item['title'], 'link': item['docurl'], 'source': self.name } yield news_dic if (self.__page < 10): yield scrapy.Request(url=self.yaowen_url_pre + '_0' + str(self.__page) + self.yaowen_url_aft, callback=self.parse, meta={'dont_merge_cookies': True}) else: yield scrapy.Request(url=self.yaowen_url_pre + '_' + str(self.__page) + self.yaowen_url_aft, callback=self.parse, meta={'dont_merge_cookies': True}) self.__page += 1