Esempio n. 1
0
 def get_url(self):
     json = self.get_raw_json()
     if (json == None):
         return None
     dic = ju.json2py(json)
     self.__last_pulled_item = dic
     return dic['link']
Esempio n. 2
0
 def parse(self, response):
     if (response.status == 200):
         json = response.body.decode('utf-8')
         data = ju.json2py(json)
         for item in data:
             news_dic = {
                 'title': item['title'],
                 'link': item['originalSource'],
                 'source': self.name
             }
             yield news_dic
Esempio n. 3
0
    def parse(self, response):
        if (response.status == 200):
            json = response.body.decode('utf-8')[16:].replace('category', '"category"').replace('item', '"item"')

            newses = ju.json2py(json)['item']
            for n in newses:
                news_dic = {
                    'title': n[1],
                    'link': n[2],
                    'source': self.name
                }
                yield news_dic
Esempio n. 4
0
 def parse(self, response):
     if (response.status == 200):
         decode_flag = True
         json = ''
         try:
             json = response.body.decode('gbk')[14:-1]
         except Exception as e:
             print(e)
             decode_flag = False
         if (decode_flag == True):
             data = ju.json2py(json)
             for item in data:
                 news_dic = {
                     'title': item['title'],
                     'link': item['docurl'],
                     'source': self.name
                 }
                 yield news_dic
Esempio n. 5
0
    def parse(self, response):
        if (response.status == 200):
            json = response.body.decode('gbk')[14:-1]
            data = ju.json2py(json)
            for item in data:
                news_dic = {
                    'title': item['title'],
                    'link': item['docurl'],
                    'source': self.name
                }
                yield news_dic

            if (self.__page < 10):
                yield scrapy.Request(url=self.yaowen_url_pre + '_0' +
                                     str(self.__page) + self.yaowen_url_aft,
                                     callback=self.parse,
                                     meta={'dont_merge_cookies': True})
            else:
                yield scrapy.Request(url=self.yaowen_url_pre + '_' +
                                     str(self.__page) + self.yaowen_url_aft,
                                     callback=self.parse,
                                     meta={'dont_merge_cookies': True})
            self.__page += 1