def parse_item(self, response): i = MysqlpjtItem() #通过xpath表达式提取网页标题 i["name"]=response.xpath("/html/head/title/text()").extract() #通过xpath表达式提取网页的关键词 i["keywd"]=response.xpath("/html/head/meta[@name='keywords']/@content").extract() return i
def parse_item(self, response): i = MysqlpjtItem() i['title'] = response.xpath("/html/head/title/text()").extract() i['keywd'] = response.xpath("/html/head/meta[@name='keywords']/@content").extract() #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract() #i['name'] = response.xpath('//div[@id="name"]').extract() #i['description'] = response.xpath('//div[@id="description"]').extract() return i
def parse_item(self, response): i = MysqlpjtItem() i['name'] = response.xpath("/html/head/title/text()").extract() # print(i['name']) i['keywd'] = response.xpath( '/html/head/meta[@name="keywords"]/@content').extract() # print(i['keywd']) return i
def parse_item(self, response): i = MysqlpjtItem() i['name']=response.xpath('/html/head/title/text()').extract() if response.xpath('//meta[@name="keywords"]/@content').extract()==[]: i['keywd'] = response.xpath('//div[@id="keywords"]/@data-wbkey').extract() i['keywd']=response.xpath('//meta[@name="keywords"]/@content').extract() #i['domain_id'] = response.xpath('//input[@id="sid"]/@value').extract() #i['name'] = response.xpath('//div[@id="name"]').extract() #i['description'] = response.xpath('//div[@id="description"]').extract() return i