def process_item(self, item, spider): image_urls = [] for image_url in item['image_urls']: url = image_url.get('url') urlHash = EncryptUtil.md5(url) path = 'full/' + str(urlHash) + '.jpg' detailPath = self.savePath + '/' + path # 创建目录 saveDir = self.savePath + '/full' if not FileUtil.dirIsExist(saveDir): FileUtil.createDir(saveDir) if FileUtil.fileIsExist(detailPath): spider.logDao.info(u'图片已经存在本地:' + url) image_url_new = { 'ok': True, 'x': { 'url': url, 'path': path } } else: try: fileResponse = requests.get(url, timeout=10) req_code = fileResponse.status_code req_msg = fileResponse.reason if req_code == 200: open(detailPath, 'wb').write(fileResponse.content) image_url_new = { 'ok': True, 'x': { 'url': url, 'path': path } } spider.logDao.info(u'图片成功下载:' + url) else: spider.logDao.info(u'下载图片失败:' + url) image_url_new = { 'ok': False, 'x': { 'url': url, } } except Exception, e: print e spider.logDao.warn(u'下载图片失败:' + url) image_url_new = { 'ok': False, 'x': { 'url': url, } } image_urls.append(image_url_new) # 空转2s TimerUtil.sleep(2)
def downLoadImage(image_url_sources): image_urls = [] for image_url in image_url_sources: file_path = os.path.dirname(os.path.realpath(__file__)) + u'/image' if not os.path.isdir(file_path): os.mkdir(file_path) url = image_url.get('url') print url urlHash = EncryptUtil.md5(url) fileName = str(urlHash) + '.jpg' detailPath = file_path + '\\' + fileName if FileUtil.fileIsExist(detailPath): print u'图片已经存在本地:' + url image_url_new = { 'ok': True, 'x': { 'url': url, 'path': detailPath, 'fileName': fileName } } else: try: fileResponse = requests.get(url, timeout=10) req_code = fileResponse.status_code req_msg = fileResponse.reason if req_code == 200: open(detailPath, 'wb').write(fileResponse.content) # 判断大小是否大于100kb 压缩到600, 质量为80 if len(fileResponse.content) > 100 * 1024: # 目标图片大小 dst_w = 600 dst_h = 600 # 保存的图片质量 save_q = 80 ImageCompressUtil().resizeImg(ori_img=detailPath, dst_img=detailPath, dst_w=dst_w, dst_h=dst_h, save_q=save_q) image_url_new = { 'ok': True, 'x': { 'url': url, 'path': detailPath, 'fileName': fileName } } # http://p0.ifengimg.com/pmop/2017/1010/E66C2599CE9403A670AD405F4CCAB271B366D7DC_size415_w1290_h692.png print u'图片成功下载,大小:' + str( len(fileResponse.content) / 1024) + 'kb ' + url print u'最终存储图片,大小:' + str( os.path.getsize(detailPath) / 1024) + 'kb ' + url else: print u'下载图片失败:' + url image_url_new = { 'ok': False, 'x': { 'url': url, } } except Exception, e: print u'下载图片失败:' + url image_url_new = { 'ok': False, 'x': { 'url': url, } } image_urls.append(image_url_new) # 空转2s TimerUtil.sleep(2)