def readDetailHost(self, response): meta = response.meta str = meta["item"] item = Host() item.parse(str) print "===https://vinalo.com/%s-%s" % (item.alias,item.crawler) yield self.checkNextPage(meta) if item.checkExisted(): # print "=========== existed host ================" # print "https://vinalo.com/%s-%s" % (item.alias,item.crawler) return yield {'image_urls':[item.image_profile]} image_guid = hashlib.sha1(item.image_profile).hexdigest() item.image_profile = '%s.jpg' % (image_guid) item.parseContent(response) id = item.insertDB() if id > 0: cityName = meta["cityName"] self.state[cityName] = self.state.get(cityName, 0) + 1 # print "=========== new host ================" # print "https://vinalo.com/%s-%s" % (item.alias,item.crawler) #store tag objectTag = ObjectTag() for tagId in item.listTagId: if tagId > 0: # print id, tagId objectTag.insertNewObjectTag(id, tagId)
def readDetailHost(self, response): str = response.meta["item"] item = Host() item.parse(str) if item.checkExisted(): # print "=========== existed host ================" return item.parseContent(response) id = item.insertDB() if id > 0: cityName = response.meta["cityName"] self.state[cityName] = self.state.get(cityName, 0) + 1