def parse(self, response): #初始化item item=MypjtItem() #通过Xpath表达式提取该网页中的标题信息 item["title"]=response.xpath("/html/head/title").extract() #输出提取到的标题信息 print item["title"]
def parse(self, response): item=MypjtItem() item["title"]=response.xpath("/html/head/title").extract() #print item["title"] # item["title"]是一个列表,所以我们可以通过for循环遍历出该列表中的元素 for i in item["title"]: #对遍历出来的标题信息进行encode("gbk")编码 print i.encode("gbk")
def parse(self, response): # pass item = MypjtItem() item["title"] = response.xpath("/html/head/title/text()").extract() item["key"] = response.xpath( "//meta[@name='keywords']/@content").extract() print(item["title"]) #别忘了return item 不然抓到的数据什么都没有返回 return item
def parse(self, response): item = MypjtItem() #通过Xpath表达式提取网页中的标题信息 # (4) item["title"] = response.xpath("/html/head/title/text()").extract() item["key"] = response.xpath( "//meta[@name='keywords']/@content").extract() #直接输出,在Python3.X中,虽然包含中文信息,但直接输出即可 print(item["title"], item["key"]) return item
def __init__(self): super(CnySpider, self).__init__() self.timeList = [] self.datetime = None self.currentPageIndex = 0 self.allPagesNum = None self.item = MypjtItem() self.item['data_list'] = [] self.item['currency_name'] = 'CNY' #获取最新的更新时间 self.datetime = check_all_currency_tb('CNY', "CNY_tb") self.item['new_update_date'] = self.datetime
def __init__(self): super(EurSpider,self).__init__() self.currency_name='EUR' self.currency_tb_name=self.currency_name+'_tb' self.page_data=[] self.item=MypjtItem() self.item['currency_name']=self.currency_name self.item['data_list']=[] self.item['top_list']=['货币名称','汇率','更新日期','default'] #print(self.item['new_update_date']) self.exchange_currency_list=['CNY','JPY','GBP','USD','RUB']#人民币 美元 英镑 欧元 卢布 self.index=0 logger.info(self.start_urls[0]) check_all_currency_tb(self.currency_name,self.currency_tb_name)
def parse(self, response): item=MypjtItem() #通过Xpath表达式提取网页中的标题信息 item["title"]=response.xpath("/html/head/title/text()") #直接输出,在Python3.X中,虽然包含中文信息,但直接输出即可 print(item["title"])
def parse(self, response): item = MypjtItem() item['urlname'] = response.xpath('/html/head/title/text()') item['key'] = response.xpath('//meta[@name="keywords"]/@content').extract() yield item
def parse(self, response): item = MypjtItem() item["title"] = response.xpath("/html/head/title/text()").extract() item['key'] = response.xpath("//meta[@name='keywords']/@content").extract() print(item) return item
def parse(self, response): item = MypjtItem() item["title"] = response.xpath("/html/head/title/text()").extract() #后面这个方法很重要,anything.xpath('...') is a selector, not a string item["key"] = response.xpath("//meta[@name='keywords']/@content").extract() #print(item["title"]) yield item #需要有item的返回哦,不然pipeline抓不到数据