Beispiel #1
0
 def parse(self, response):
     print(response.url)
     links_data = Selector(response).xpath('//script[contains(., "var data = ")]/text()').extract()[0]
     l, r = links_data.find('{'), links_data.rfind('}')
     if l  == -1 or r == -1:
         yield Request(response.url)  # failed, and retry!
     else:
         article_links = [item['url'] for item in json.loads(links_data[l:(r+1)].encode('utf-8', 'ignore'))['result']]
         with open('root/csdn.%s.content'%(self.tag), 'ab') as fp:
             fp.write('\n'.join(article_links + ['']))