class ImageSpider(BaseSpider): def __init__(self,img_url,number,queue): BaseSpider.__init__(self) self.image_url = img_url self.number = str(number) self.queue = queue self.cd = ColorDescriptor((8, 12, 3)) def run(self): response = self.getResponse(self.image_url) if response == None: return try: image = Image.open(StringIO(response.content)) imgArr = np.asarray(image) feature = self.cd.describe(imgArr) temp = ' '.join((str(i) for i in feature)) self.queue.put([self.image_url,temp]) except Exception,e: print e
def __init__(self,img_url,number,queue): BaseSpider.__init__(self) self.image_url = img_url self.number = str(number) self.queue = queue self.cd = ColorDescriptor((8, 12, 3))
from bs4 import BeautifulSoup import requests as req import re import cv2 from StringIO import StringIO from color.colordescriptor import ColorDescriptor from PIL import Image import numpy as np headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:40. 0) Gecko/20100101 Firefox/40.0'} cookies = dict(is_click='1') url = 'http://www.topit.me/album/2026272?p=' page = 80 conn = db.connect(host='localhost',user='******',passwd='123',db='test2') cur = conn.cursor() cd = ColorDescriptor((8, 12, 3)) for i in range(42): current_url = url+str(i+1) print 'page:',i try: response = req.get(current_url,headers=headers,cookies=cookies) except Exception,e: print e,'12' continue soup = BeautifulSoup(response.text) atags = soup.find_all('a',href=re.compile('item')) for temp in set(atags): a = temp.get('href') print a try: response = req.get(a,headers=headers,cookies=cookies)