def parse_page_content(self,response): item = CqspiderItem() page_content = response.xpath('//tr[4]/td[2]//div[1]').extract() page_content = page_content.pop() page_content = re.sub('<[^>]+>',' ',page_content) fil = FilterForHC(page_content) item['project_name'] = fil.get_project_name() item['bid_name'] = fil.get_bid_name() item['bid_time'] = fil.get_bid_time() item['bid_money'] = fil.get_bid_money() return item
def parse_page_content(self, response): item = CqspiderItem() content = response.xpath('//*[@id="textflag"]').extract().pop() content = re.sub('<[^>]*>', '', content) fil = FilterForJJ(content) item['project_name'] = fil.get_project_name() item['bid_name'] = fil.get_bid_name() item['bid_time'] = fil.get_bid_time() item['bid_money'] = fil.get_bid_money() return item
def parse_page_content(self, response): item = CqspiderItem() page_content = response.xpath( '//*[@id="bulletinContent"]/tbody').extract().pop() page_content = re.sub('<[^>]+>', ' ', page_content) fil = FilterForCQS(page_content) item['project_name'] = fil.get_project_name() item['bid_name'] = fil.get_bid_name() item['bid_time'] = fil.get_bid_time() item['bid_money'] = fil.get_bid_money() return item
def parse_page_content(self, response): item = CqspiderItem() page_content = response.xpath('//*[@id="tblInfo"]').extract() if page_content: page_content = page_content.pop() page_content = re.sub('<[^>]+>', ' ', page_content) fil = FilterForBB(page_content) item['project_name'] = fil.get_project_name() item['bid_name'] = fil.get_bid_name() item['bid_time'] = fil.get_bid_time() item['bid_money'] = fil.get_bid_money() return item