Exemple #1
0
 #把All_Xpath中的数据提取出来
 for key in All_Xpath.keys():
     item.fields[key] = Field()
     try:
         #itemloader在add_xxx方法找不到值的时候,会自动忽略这个字段,可是我不想忽略它,这时候需要将其置为空("")
         if map(
                 lambda x: 1 if x else 0,
                 map(
                     lambda x: response.xpath(x).extract()
                     if x != "/" else "",
                     Final_Xpath[key])) in [[0, 0], [0]]:
             map(lambda x: l.add_value(key, ""), ["just_one"])
         else:
             map(
                 lambda x: l.add_value(key,
                                       i.xpath(x).extract())
                 if i.xpath(x).extract() != [] else "",
                 Final_Xpath[key])
     except Exception, e:
         print Exception, ",", e
 #将除了All_Xpath中的数据提取出来,像豆瓣就特别需要这种情况,一般下面的数据是(多次取得),All_Xpath中才是真正单条的数据
 for key in my_Final_Xpath.keys():
     item.fields[key] = Field()
     try:
         if map(
                 lambda x: 1 if x else 0,
                 map(
                     lambda x: response.xpath(x).extract()
                     if x != "/" else "",
                     Final_Xpath[key])) in [[0, 0], [
                         0
Exemple #2
0
				All_Xpath = my_Final_Xpath['All_Xpath'].copy()
				del my_Final_Xpath['All_Xpath']
				all_xpath = All_Xpath['all_xpath']
				del All_Xpath['all_xpath']
				for i in response.xpath(all_xpath[0]):
						item = MovieSpiderItem()
						l = ItemLoader(item=item, response=response)
						#把All_Xpath中的数据提取出来
						for key in All_Xpath.keys():
								item.fields[key] = Field()
								try:
										#itemloader在add_xxx方法找不到值的时候,会自动忽略这个字段,可是我不想忽略它,这时候需要将其置为空("")
										if map(lambda x:1 if x else 0, map(lambda x:response.xpath(x).extract() if x != "/" else "",Final_Xpath[key])) in [[0,0],[0]]:
												map(lambda x:l.add_value(key , ""),["just_one"])
										else:
												map(lambda x:l.add_value(key, i.xpath(x).extract()) if i.xpath(x).extract() != [] else "",Final_Xpath[key])
								except Exception,e:
										print Exception,",",e
						#将除了All_Xpath中的数据提取出来,像豆瓣就特别需要这种情况,一般下面的数据是(多次取得),All_Xpath中才是真正单条的数据
						for key in my_Final_Xpath.keys():
								item.fields[key] = Field()
								try:
										if map(lambda x:1 if x else 0, map(lambda x:response.xpath(x).extract() if x != "/" else "",Final_Xpath[key])) in [[0,0],[0]] and key != "site_name":
												map(lambda x:l.add_value(key , ""),["just_one"])
										elif key == "site_name":
												map(lambda x:l.add_value(key , x),my_Final_Xpath[key])
										else:
												map(lambda x:l.add_xpath(key , x) if response.xpath(x).extract() != [] else "",Final_Xpath[key])
								except Exception,e:
											print Exception,":",e