Beispiel #1
0
 def parse(self, response):
     print 'response'
     for div in response.css("a[class=j_th_tit] "):
         item = jwcsysuItem()
         item["newTitle"] = "wow"
         item["newContent"] = div.xpath('@title').extract()[0]
         item["newHref"] = div.xpath('@href').extract()[0]
         print item
         yield item
Beispiel #2
0
 def parse(self, response):
   print 'response'
   for div in response.css("a[class=j_th_tit] "):
     item = jwcsysuItem()
     item["newTitle"] = "wow"
     item["newContent"] = div.xpath('@title').extract()[0]
     item["newHref"] = div.xpath('@href').extract()[0]
     print item
     yield item
Beispiel #3
0
 def parse(self, response):
     print 'response'
     for div in response.css("div[class=threadlist_lz]"):
         item = jwcsysuItem()
         item["newTitle"] = "wow"
         print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
         item["newContent"] = div.css("a::(attr=title)").extract()
         item["newHref"] = div.css("a::(attr=href)").extract()
         print item
         yield item
Beispiel #4
0
 def parse(self, response):
   print 'response'
   for div in response.css("div[class=threadlist_lz]"):
     item = jwcsysuItem()
     item["newTitle"] = "wow"
     print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
     item["newContent"] = div.css("a::(attr=title)").extract()
     item["newHref"] = div.css("a::(attr=href)").extract()
     print item
     yield item
 def parse_item(self, response):
   item = jwcsysuItem()
   try:
     item["newCatalog"] = response.css("div[class=sec_art_list]").css("a").extract()[2].split("<")[-2].split(">")[-1]
     item["newTitle"] = response.css("div[class=art_content]").css("h1::text").extract()[0]
     item["newContent"] = response.css("div[class=content]").extract()[0]
     item["newHref"] = response.url
     item["newTime"] = response.css("div[class=art_property]").extract()[0][-62:-52]
     return item
   except Exception, e:
     return None
Beispiel #6
0
 def parse_item(self, response):
     item = jwcsysuItem()
     try:
         item["newCatalog"] = response.css("div[class=sec_art_list]").css(
             "a").extract()[2].split("<")[-2].split(">")[-1]
         item["newTitle"] = response.css("div[class=art_content]").css(
             "h1::text").extract()[0]
         item["newContent"] = response.css(
             "div[class=content]").extract()[0]
         item["newHref"] = response.url
         item["newTime"] = response.css(
             "div[class=art_property]").extract()[0][-62:-52]
         return item
     except Exception, e:
         return None