def process_item(self, item, spider): image_urls = [] for image_url in item['image_urls']: url = image_url.get('url') urlHash = EncryptUtil.md5(url) path = 'full/' + str(urlHash) + '.jpg' detailPath = self.savePath + '/' + path # 创建目录 saveDir = self.savePath + '/full' if not FileUtil.dirIsExist(saveDir): FileUtil.createDir(saveDir) if FileUtil.fileIsExist(detailPath): spider.logDao.info(u'图片已经存在本地:' + url) image_url_new = { 'ok': True, 'x': { 'url': url, 'path': path } } else: try: fileResponse = requests.get(url, timeout=10) req_code = fileResponse.status_code req_msg = fileResponse.reason if req_code == 200: open(detailPath, 'wb').write(fileResponse.content) image_url_new = { 'ok': True, 'x': { 'url': url, 'path': path } } spider.logDao.info(u'图片成功下载:' + url) else: spider.logDao.info(u'下载图片失败:' + url) image_url_new = { 'ok': False, 'x': { 'url': url, } } except Exception, e: print e spider.logDao.warn(u'下载图片失败:' + url) image_url_new = { 'ok': False, 'x': { 'url': url, } } image_urls.append(image_url_new) # 空转2s TimerUtil.sleep(2)