def parse_yjsy_item(self,response): i = NcuItem() i['art_url']=response.url i['art_type']='graduate' content=response.xpath('//table[@class="border"]').extract()[0] i['content']=encode_content(response.encoding,content,response.body) return i
def parse_item(self, response): i = NcuItem() i['art_url']=response.url i['art_type']='eduaffiche' content=response.xpath('//div[@class="aside"]').extract()[0] i['content']=encode_content(response.encoding,content,response.body) print i['content'] return i
def parse_item(self, response): i = NcuItem() i["art_url"] = response.url i["art_type"] = "news" content = response.xpath('//div[@class="article_show clearfix"]').extract()[0] i["content"] = encode_content(response.encoding, content, response.body) print i["content"] return i
def parse_cdgh_item(self,response): i = NcuItem() i['art_url']=response.url i['art_type']='labor_union' content=response.xpath('//td[@style="BACKGROUND-COLOR: #ffffff"]').extract()[0] i['content']=encode_content(response.encoding,content,response.body) print i['content'] return i
def parse_jwc_item(self,response): i = NcuItem() i['art_url']=response.url i['art_type']='purchasing' content=response.xpath('//div[@class="box3"]').extract()[0] i['content']=encode_content(response.encoding,content,response.body) print i['content'] return i
def parse_item(self, response): i = NcuItem() i['art_url']=response.url i['art_type']='news' content=response.xpath('//table[@width="770"]').extract()[3] i['content']=encode_content(response.encoding,content,response.body) print i['content'] return i
def parse_cgzx_item(self, response): # response.encoding='gbk' i = NcuItem() i['art_url']=response.url i['art_type']='invitation' content=response.xpath('//div[@id="article_right"]').extract()[0] i['content']=encode_content(response.encoding,content,response.body) print i['content'] return i