class ImageSpider(BaseSpider): def __init__(self,img_url,number,queue): BaseSpider.__init__(self) self.image_url = img_url self.number = str(number) self.queue = queue self.cd = ColorDescriptor((8, 12, 3)) def run(self): response = self.getResponse(self.image_url) if response == None: return try: image = Image.open(StringIO(response.content)) imgArr = np.asarray(image) feature = self.cd.describe(imgArr) temp = ' '.join((str(i) for i in feature)) self.queue.put([self.image_url,temp]) except Exception,e: print e
a = temp.get('href') print a try: response = req.get(a,headers=headers,cookies=cookies) soup = BeautifulSoup(response.text) except Exception,e: print e,'13' continue try: img_url = soup.find_all('a',rel=re.compile('light'))[0].get('href') except IndexError,e: print e,'list' continue try: response = req.get(img_url,headers=headers,cookies=cookies) except Exception,e: print e,'14' continue image = Image.open(StringIO(response.content)) imgArr = np.asarray(image) try: feature = cd.describe(imgArr) except Exception,e: print e continue temp = ' '.join((str(i) for i in feature)) if not cur.execute('select url from image where url="%s"'%img_url): cur.execute("insert into image(url,hist) value('%s','%s')"%(img_url,temp)) conn.commit()