Ejemplo n.º 1
0
    def process_item(self, item, spider):
        image_urls = []
        for image_url in item['image_urls']:
            url = image_url.get('url')
            urlHash = EncryptUtil.md5(url)
            path = 'full/' + str(urlHash) + '.jpg'
            detailPath = self.savePath + '/' + path
            # 创建目录
            saveDir = self.savePath + '/full'
            if not FileUtil.dirIsExist(saveDir):
                FileUtil.createDir(saveDir)

            if FileUtil.fileIsExist(detailPath):
                spider.logDao.info(u'图片已经存在本地:' + url)
                image_url_new = {
                    'ok': True,
                    'x': {
                        'url': url,
                        'path': path
                    }
                }
            else:
                try:
                    fileResponse = requests.get(url, timeout=10)
                    req_code = fileResponse.status_code
                    req_msg = fileResponse.reason
                    if req_code == 200:
                        open(detailPath, 'wb').write(fileResponse.content)
                        image_url_new = {
                            'ok': True,
                            'x': {
                                'url': url,
                                'path': path
                            }
                        }
                        spider.logDao.info(u'图片成功下载:' + url)
                    else:
                        spider.logDao.info(u'下载图片失败:' + url)
                        image_url_new = {
                            'ok': False,
                            'x': {
                                'url': url,
                            }
                        }
                except Exception, e:
                    print e
                    spider.logDao.warn(u'下载图片失败:' + url)
                    image_url_new = {
                        'ok': False,
                        'x': {
                            'url': url,
                        }
                    }
            image_urls.append(image_url_new)
            # 空转2s
            TimerUtil.sleep(2)
Ejemplo n.º 2
0
def downLoadImage(image_url_sources):
    image_urls = []
    for image_url in image_url_sources:
        file_path = os.path.dirname(os.path.realpath(__file__)) + u'/image'
        if not os.path.isdir(file_path):
            os.mkdir(file_path)
        url = image_url.get('url')
        print url
        urlHash = EncryptUtil.md5(url)
        fileName = str(urlHash) + '.jpg'
        detailPath = file_path + '\\' + fileName

        if FileUtil.fileIsExist(detailPath):
            print u'图片已经存在本地:' + url
            image_url_new = {
                'ok': True,
                'x': {
                    'url': url,
                    'path': detailPath,
                    'fileName': fileName
                }
            }
        else:
            try:
                fileResponse = requests.get(url, timeout=10)
                req_code = fileResponse.status_code
                req_msg = fileResponse.reason
                if req_code == 200:
                    open(detailPath, 'wb').write(fileResponse.content)
                    # 判断大小是否大于100kb 压缩到600, 质量为80
                    if len(fileResponse.content) > 100 * 1024:
                        # 目标图片大小
                        dst_w = 600
                        dst_h = 600
                        # 保存的图片质量
                        save_q = 80
                        ImageCompressUtil().resizeImg(ori_img=detailPath,
                                                      dst_img=detailPath,
                                                      dst_w=dst_w,
                                                      dst_h=dst_h,
                                                      save_q=save_q)
                    image_url_new = {
                        'ok': True,
                        'x': {
                            'url': url,
                            'path': detailPath,
                            'fileName': fileName
                        }
                    }
                    # http://p0.ifengimg.com/pmop/2017/1010/E66C2599CE9403A670AD405F4CCAB271B366D7DC_size415_w1290_h692.png
                    print u'图片成功下载,大小:' + str(
                        len(fileResponse.content) / 1024) + 'kb ' + url
                    print u'最终存储图片,大小:' + str(
                        os.path.getsize(detailPath) / 1024) + 'kb ' + url
                else:
                    print u'下载图片失败:' + url
                    image_url_new = {
                        'ok': False,
                        'x': {
                            'url': url,
                        }
                    }
            except Exception, e:
                print u'下载图片失败:' + url
                image_url_new = {
                    'ok': False,
                    'x': {
                        'url': url,
                    }
                }
        image_urls.append(image_url_new)
        # 空转2s
        TimerUtil.sleep(2)