예제 #1
0
 def parse_yjsy_item(self,response):
     i = NcuItem()
     i['art_url']=response.url
     i['art_type']='graduate'
     content=response.xpath('//table[@class="border"]').extract()[0]
     i['content']=encode_content(response.encoding,content,response.body)
     return i
예제 #2
0
 def parse_item(self, response):
     i = NcuItem()
     i['art_url']=response.url
     i['art_type']='eduaffiche'
     content=response.xpath('//div[@class="aside"]').extract()[0]
     i['content']=encode_content(response.encoding,content,response.body)
     print i['content']
     return i
예제 #3
0
 def parse_item(self, response):
     i = NcuItem()
     i["art_url"] = response.url
     i["art_type"] = "news"
     content = response.xpath('//div[@class="article_show clearfix"]').extract()[0]
     i["content"] = encode_content(response.encoding, content, response.body)
     print i["content"]
     return i
예제 #4
0
 def parse_cdgh_item(self,response):
     i = NcuItem()
     i['art_url']=response.url
     i['art_type']='labor_union'
     content=response.xpath('//td[@style="BACKGROUND-COLOR: #ffffff"]').extract()[0]
     i['content']=encode_content(response.encoding,content,response.body)
     print i['content']
     return i
예제 #5
0
 def parse_jwc_item(self,response):
     i = NcuItem()
     i['art_url']=response.url
     i['art_type']='purchasing'
     content=response.xpath('//div[@class="box3"]').extract()[0]
     i['content']=encode_content(response.encoding,content,response.body)
     print i['content']
     return i
예제 #6
0
 def parse_item(self, response):
     i = NcuItem()
     i['art_url']=response.url
     i['art_type']='news'
     content=response.xpath('//table[@width="770"]').extract()[3]
     i['content']=encode_content(response.encoding,content,response.body)
     print i['content']
     return i
예제 #7
0
 def parse_cgzx_item(self, response):
     # response.encoding='gbk'
     i = NcuItem()
     i['art_url']=response.url
     i['art_type']='invitation'
     content=response.xpath('//div[@id="article_right"]').extract()[0]
     i['content']=encode_content(response.encoding,content,response.body)
     print i['content']
     return i