Example #1
0
 def file_downloaded(self, response, request, info):
     #path = self.file_path(request, response=response, info=info)
     path = response.meta.get('item')['folder'][0] + '/' + response.meta.get('item')['pin_id'][0] + '.' + response.meta.get('item')['pic_type'][0]
     buf = BytesIO(response.body)
     self.store.persist_file(path, buf, info)
     checksum = md5sum(buf)
     return checksum
Example #2
0
    def _transform_downloaded(self, response, request, info, *, item):
        date_code, origin_path = parse_save_url(request.url)
        buf = BytesIO(response.body)
        checksum = md5sum(buf)
        buf.seek(0)
        self.store.persist_file(origin_path, buf, info)
        if not origin_path.endswith("xml"):
            date_code = os.path.dirname(urlparse(request.url).path)
            item.fields['date_tiles'].setdefault(date_code, set())
            item.fields['date_tiles'][date_code].add(origin_path)
            if item.fields['date_tiles'][date_code] == item['tile_chklist'][date_code]:
                product = os.path.dirname(origin_path)
                tile_list =  [os.path.join(settings.FILES_STORE, v) for v in 
                                    item.fields['date_tiles'][date_code]]
                stitch_and_reproject(tile_list, 
                                    ALL_DONE_FILE_OUTPUT_PATH, 
                                    product=product,
                                    date=self.parse_output_date(request.url),
                                    reproject_options=item.reproject_options,
                                    subset=subset,
                                    outformat=output_type, 
                                    save_stitch_file= yes_no_parser(save_stitch_file))
                item.fields['date_tiles'].pop(date_code)

        return checksum
Example #3
0
 def file_downloaded(self, response, request, info):
     path = self.file_path(request, response=response, info=info)
     buf = BytesIO(response.body)
     checksum = md5sum(buf)
     buf.seek(0)
     self.store.persist_file(path, buf, info)
     return checksum
Example #4
0
    def file_downloaded(self, response, request, info, unzip_path=None):

        path = self.file_path(request, response=response, info=info)
        buf = BytesIO(response.body)
        checksum = md5sum(buf)
        buf.seek(0)
        self.store.persist_file(path, buf, info)

        file_path = zip_download_path + "/" + path
        key = path.split("/")[-1].split(".")[0]

        FileService.unzip_files(file_path, key)
        unzip_path = base_unzip_path + "/" + key

        for file in os.listdir(unzip_path):
            file_path = unzip_path + "/" + file
            json_response = {}  # MaliciousCheck.check_malicious(file_path)
            metadata = FileService.get_file_meta(path)
            integrated_data = metadata.update(json_response)
            zip_path = unzip_path + "/"
            with open(zip_path + 'metadata.txt', 'w') as outfile:
                json.dump(metadata, outfile)
            with open(zip_path + 'report.txt', 'w') as outfile:
                json.dump(json_response, outfile)

            zipped_file = FileService.zip_files(file_path, key=key)
            bucket_name = metadata['extension']
            client = MinioClient.get_client()
            if not client.bucket_exists(bucket_name):
                client.create_bucket(bucket_name)
            client.upload_file(bucket_name, path.split("/")[-1], zipped_file)

        return checksum
    def image_downloaded(self, response, request, info):
        checksum = None
        print("*****pipeline****")
        for path, image, buf in self.get_images(response, request, info):
            if checksum is None:
                buf.seek(0)
                checksum = md5sum(buf)

            width, height = image.size

            filename = "{0:010}.jpg".format(self.count)
            dirname = response.meta['image_directory_name']

            self.count += 1

            path = 'full/dl/{0}/{1}'.format(dirname, filename)

            self.store.persist_file(path,
                                    buf,
                                    info,
                                    meta={
                                        'width': width,
                                        'height': height
                                    })

            return checksum
Example #6
0
 def image_downloaded(self, response, request, info, *, item=None):
     checksum = None
     image_stream = self.get_images(response, request, info, item=item)
     while True:
         try:
             path, image, buf = next(image_stream)
         except OSError:
             logger.exception('Could not process image')
             continue
         except StopIteration:
             break
         except Exception:
             logger.exception('Stopped processing images')
             continue
         if checksum is None:
             buf.seek(0)
             checksum = md5sum(buf)
         width, height = image.size
         self.store.persist_file(path,
                                 buf,
                                 info,
                                 meta={
                                     'width': width,
                                     'height': height
                                 },
                                 headers={'Content-Type': 'image/jpeg'})
     return checksum
Example #7
0
    def image_downloaded(self, response, request, info, item=None):
        path = self.file_path(request, response=response, info=info)

        try:
            orig_image = Image.open(BytesIO(response.body))
        except UnidentifiedImageError:
            raise ImageException(f'Image cannot be identified ({request.url})')

        width, height = orig_image.size
        if width > self.max_size_px or height > self.max_size_px:
            raise ImageException(
                f'Image too large ({width}x{height} < {self.max_size_px}x{self.max_size_px})'
            )

        image, buffer = self.convert_image(orig_image)
        buffer.seek(0)
        checksum = md5sum(buffer)

        width, height = image.size
        self.store.persist_file(path,
                                buffer,
                                info,
                                meta={
                                    'width': width,
                                    'height': height
                                },
                                headers={'Content-Type': 'image/png'})
        return checksum
Example #8
0
    def image_downloaded(self, response, request, info):
        print("*\n" * 5, "正在下载图片")
        checksum = None
        for path, image, buf in self.get_images(response, request, info):
            if checksum is None:
                buf.seek(0)
                checksum = md5sum(buf)
            time = self._get_time()
            try:
                with self.pool.connection() as connection:
                    table = connection.table(self.table_name)
                    table.put(path, {
                        "cf:content": buf.getvalue(),
                        "cf:size": "880X600"
                    })
                    connection.close()
                    print("successfully storing image into hbase,{time},{id}".
                          format(type=type, time=time, id=path))

            except Exception as e:
                print("Caught Hbase exception of image storing:{e}".format(
                    e=str(e)))
                print("failed storing image into hbase,{time},{id}".format(
                    type=type, time=time, id=path))
        return checksum
Example #9
0
    def image_downloaded(self, response, request, info):
        checksum = None
        for path, image, buf in self.get_images(response, request, info):
            if checksum is None:
                buf.seek(0)
                checksum = md5sum(buf)
            width, height = image.size

            if width > 600 and height > 600:
                self.store.persist_file(path,
                                        buf,
                                        info,
                                        meta={
                                            'width': width,
                                            'height': height
                                        },
                                        headers={'Content-Type': 'image/jpeg'})
                thumb = image.copy()
                thumb.thumbnail((200, 200))
                p = path.replace('.jpg', '_thumb.jpg')
                p = os.path.join(os.path.abspath('./data/images'), p)
                p = os.path.abspath(p)
                p, filename = os.path.split(p)[0], os.path.split(p)[1]
                os.makedirs(p, exist_ok=True)
                thumb.save(os.path.join(p, filename), 'JPEG')
                return checksum
            else:
                return None
        return checksum
Example #10
0
    def image_downloaded(self, response, request, info):
        checksum = None
        for path, image, buf in self.get_images(response, request, info):
            if checksum is None:
                buf.seek(0)
                checksum = md5sum(buf)
            width, height = image.size

            # They save the image Here!
            try:
                self.store.persist_file(path,
                                        buf,
                                        info,
                                        meta={
                                            'width': width,
                                            'height': height
                                        },
                                        headers={'Content-Type': 'image/jpeg'})
                logging.info('We store the following path: %s', path)
            except:
                logging.critical('We did not store the following path: %s',
                                 path)
                sys.exit()
            else:
                logging.info('Image_downloaded Checksum: %s', checksum)
                return checksum
Example #11
0
 def file_downloaded(self, response, request, info):
     path = self.file_path(request, response=response, info=info)
     buf = BytesIO(response.body)
     checksum = md5sum(buf)
     buf.seek(0)
     self.store.persist_file(path, buf, info)
     return checksum
Example #12
0
    def image_downloaded(self, response, request, info, *, item=None):
        # function 4
        checksum = None
        for path, image, buf, most_common in self.get_images(response,
                                                             request,
                                                             info,
                                                             item=item):
            if checksum is None:
                buf.seek(0)
                checksum = md5sum(buf)

            # init meta data with width and height
            width, height = image.size
            meta_dict = {'width': width, 'height': height}
            most_common_dict = self.most_common_to_property([most_common])[0]
            meta_dict.update(most_common_dict)

            # TODO add most common color info to meta information
            self.store.persist_file(path,
                                    buf,
                                    info,
                                    meta=meta_dict,
                                    headers={'Content-Type': 'image/jpeg'})
        # HERE ARE CUSTOM CHANGES
        return checksum, most_common
Example #13
0
 def image_downloaded(self, response, request, info):
     first_buf = None
     for key, image, buf in self.get_images(response, request, info):
         self.store.persist_image(key, image, buf, info)
         if first_buf is None:
             first_buf = buf
     first_buf.seek(0)
     return md5sum(first_buf)
Example #14
0
 def image_downloaded(self, response, request, info):
     checksum = None
     for key, image, buf in self.get_images(response, request, info):
         if checksum is None:
             buf.seek(0)
             checksum = md5sum(buf)
         self.store.persist_image(key, image, buf, info)
     return checksum
Example #15
0
 def image_downloaded(self, response, request, info):
     checksum = None
     for key, image, buf in self.get_images(response, request, info):
         if checksum is None:
             buf.seek(0)
             checksum = md5sum(buf)
         self.store.persist_image(key, image, buf, info)
     return checksum
Example #16
0
 def image_downloaded(self, response, request, info):
     first_buf = None
     for key, image, buf in self.get_images(response, request, info):
         self.store.persist_image(key, image, buf, info)
         if first_buf is None:
             first_buf = buf
     first_buf.seek(0)
     return md5sum(first_buf)
Example #17
0
 def file_downloaded(self, response, request, info):
     #path = self.file_path(request, response=response, info=info)
     path = response.meta.get(
         'item')['folder'][0] + '/' + response.meta.get('item')['pin_id'][
             0] + '.' + response.meta.get('item')['pic_type'][0]
     buf = BytesIO(response.body)
     self.store.persist_file(path, buf, info)
     checksum = md5sum(buf)
     return checksum
Example #18
0
    def file_downloaded(self, response, request, info):
        """把文件以二进制形式存入文件"""

        path = self.file_path(request, response=response, info=info)
        buf = BytesIO(response.body)
        checksum = md5sum(buf)
        buf.seek(0)
        self.store.persist_file(path, buf, info)  # 调用存储器的persist_file方法
        return checksum
Example #19
0
 def file_downloaded(self, response, request, info):
     path = self.file_path(request, response=response, info=info)
     if self.cipher is not None:
         buf = BytesIO(self.cipher.encrypt(response.body))
     else:
         buf = BytesIO(response.body)
     checksum = md5sum(buf)
     buf.seek(0)
     self.store.persist_file(path, buf, info)
     return checksum
Example #20
0
    def file_downloaded(self, response, request, info, path):
        """
        重定义文件下载
        """
        buf = BytesIO(response.body)
        checksum = md5sum(buf)
        buf.seek(0)
        self.store.persist_file(path, buf, info)

        return checksum
Example #21
0
    def persist_file(self, key, file_content, info, filename):
        self._mkdir(os.path.join(self.basedir, *key.split('/')), info)
        absolute_path = self._get_filesystem_path(key, filename)
        with open(absolute_path, "w") as wf:
            wf.write(file_content)

        with open(absolute_path, 'rb') as file_content:
            checksum = md5sum(file_content)

        return checksum
Example #22
0
    def persist_file(self, key, file_content, info, filename):
        self._mkdir(os.path.join(self.basedir, *key.split('/')), info)
        absolute_path = self._get_filesystem_path(key,filename)
        with open(absolute_path,"w") as wf:
            wf.write(file_content)

        with open(absolute_path, 'rb') as file_content:
            checksum = md5sum(file_content)
            
        return checksum
Example #23
0
 def file_downloaded(self, response, request, info):
     '''重载,增加输出文件大小的功能'''
     path = self.file_path(request, response=response, info=info)
     buf = BytesIO(response.body)
     checksum = md5sum(buf)
     buf.seek(0)
     file_size = sys.getsizeof(response.body)
     print(file_size, '************' * 30)
     buf.seek(0)
     self.store.persist_file(path, buf, info)
     return checksum
Example #24
0
    def stat_image(self, key, info):
        absolute_path = self._get_filesystem_path(key)
        try:
            last_modified = os.path.getmtime(absolute_path)
        except:  # FIXME: catching everything!
            return {}

        with open(absolute_path, "rb") as imagefile:
            checksum = md5sum(imagefile)

        return {"last_modified": last_modified, "checksum": checksum}
Example #25
0
    def file_downloaded(self, response, request, info):
        path = 'full/' + request.meta['filename'] + '.pdf'
        buf = BytesIO(response.body)
        self.store.persist_file(path, buf, info)
        checksum = md5sum(buf)
        size = os.path.getsize(path)
        if size < 100:
            self.json['filename'][response.url] = request.meta['filename']
            self.json['file_urls'].append(response.url)

        return checksum
Example #26
0
 def image_downloaded(self, response, request, info):
     checksum = None
     for path, image, buf in self.get_images(response, request, info):
         if checksum is None:
             buf.seek(0)
             checksum = md5sum(buf)
         width, height = image.size
         self.store.persist_file(
             path, buf, info, meta={"width": width, "height": height}, headers={"Content-Type": "image/jpeg"}
         )
     return checksum
Example #27
0
    def stat_file(self, path, info):
        absolute_path = self._get_filesystem_path(path)
        try:
            last_modified = os.path.getmtime(absolute_path)
        except os.error:
            return {}

        with open(absolute_path, 'rb') as f:
            checksum = md5sum(f)

        return {'last_modified': last_modified, 'checksum': checksum}
Example #28
0
    def stat_file(self, path, info):
        absolute_path = self._get_filesystem_path(path)
        try:
            last_modified = os.path.getmtime(absolute_path)
        except:  # FIXME: catching everything!
            return {}

        with open(absolute_path, 'rb') as f:
            checksum = md5sum(f)

        return {'last_modified': last_modified, 'checksum': checksum}
Example #29
0
 def image_downloaded(self, response, request, info):
     checksum = None
     for key, image, buf in self.get_images(response, request, info):
         if checksum is None:
             buf.seek(0)
             checksum = md5sum(buf)
         self.store.persist_image(key, image, buf, info)
         abs_path = self.store._get_filesystem_path(key)
         image_size = os.path.getsize(abs_path)
     width, height = image.size
     return {"checksum": checksum, "width": width, "height": height, "size": image_size}
Example #30
0
 def file_downloaded(self, response, request, info):
     # print "2-HELLLLLLOOOOOOOO"
     path = self.file_path(request, response=response, info=info)        
     buf = BytesIO(response.body)
     # print response.meta['title']
     # print "{0}.mp3".format()
     fname = "{0}.mp3".format(response.meta['title'].encode('utf-8').strip())
     self.store.persist_file(fname, buf, info)
     # self.store.persist_file(path, buf, info)
     checksum = md5sum(buf)
     return checksum
Example #31
0
    def stat_file(self, key, info):
        absolute_path = self._get_filesystem_path(key)
        try:
            last_modified = os.path.getmtime(absolute_path)
        except:  # FIXME: catching everything!
            return {}

        with open(absolute_path, 'rb') as f:
            checksum = md5sum(f)

        return {'last_modified': last_modified, 'checksum': checksum}
Example #32
0
 def image_downloaded(self, response, request, info):
     global biggestItem
     checksum = None
     for path, image, buf in self.get_images(response, request, info):
         if checksum is None:
             buf.seek(0)
             checksum = md5sum(buf)
         width, height = image.size
         if biggestItem['width']*biggestItem['height']<width*height:
             biggestItem = {'width':width, 'height':height, 'path':path, 'buf':buf,'info':info}
     return checksum
Example #33
0
 def image_downloaded(self, response, request, info):
     checksum = None
     for path, image, buf in self.get_images(response, request, info):
         if checksum is None:
             buf.seek(0)
             checksum = md5sum(buf)
         width, height = image.size
         self.store.persist_file(
             path, buf, info,
             meta={'width': width, 'height': height},
             headers={'Content-Type': 'image/jpeg'})
     return checksum
Example #34
0
 def image_downloaded(self, response, request, info):
     checksum = None
     for key, image, buf in self.get_images(response, request, info):
         if checksum is None:
             buf.seek(0)
             checksum = md5sum(buf)
         width, height = image.size
         self.store.persist_file(
             key, buf, info,
             meta={'width': width, 'height': height},
             headers={'Content-Type': 'image/jpeg'})
     return checksum
Example #35
0
 def image_downloaded(self, response, request, info):
     checksum = None
     for path, image, buf in self.get_images(response, request, info):
         if checksum is None:
             buf.seek(0)
             checksum = md5sum(buf)
         width, height = image.size
         if path.startswith('full') and self.check_gif(image):
             # Save gif from response directly.
             self.persist_gif(path, response.body, info)
         else:
             self.store.persist_file(path, buf, info, meta={'width': width, 'height': height}, headers={'Content-Type': 'image/jpeg'})
     return checksum
Example #36
0
 def image_downloaded(self, response, request, info):
     checksum = None
     for path, image, buf in self.get_images(response, request, info):
         if checksum is None:
             buf.seek(0)
             checksum = md5sum(buf)
         width, height = image.size
         filename = request._url.rsplit("/", 1)[1]
         path = 'full/{}'.format(filename)
         self.store.persist_file(
             path, buf, info, meta={'width': width, 'heigth': height}
         )
     return checksum
Example #37
0
    def stat_file(self, path, info):
        """返回文件最近修改时间和对应的md5值,不存在则返回空字典"""

        absolute_path = self._get_filesystem_path(path)
        try:
            last_modified = os.path.getmtime(absolute_path)  # 文件最近修改时间
        except:  # FIXME: catching everything!
            return {}

        with open(absolute_path, 'rb') as f:
            checksum = md5sum(f)  # 计算一个类文件对象的md5值

        return {'last_modified': last_modified, 'checksum': checksum}
Example #38
0
 def image_downloaded(self, response, request, info):
     checksum = None
     for path, image, buf in self.get_images(response, request, info):
         if checksum is None:
             buf.seek(0)
             checksum = md5sum(buf)
         width, height = image.size
         # path = 'full/%s' % response.meta['image_name']+"."+response.meta['ext']  # **Here Changed**
         path = 'full/%s' % response.meta['image_name']  # **Here Changed**
         self.store.persist_file(
             path, buf, info,
             meta={'width': width, 'height': height},
             headers={'Content-Type': 'image/jpeg'})
     return checksum
Example #39
0
 def file_downloaded(self, response, request, info):
     path = self.file_path(request, response=response, info=info)
     buf = BytesIO(response.body)
     checksum = md5sum(buf)
     file_size = sys.getsizeof(response.body)
     print(file_size, '************' * 30)
     buf.seek(0)
     # self.store.persist_file(path, buf, info)
     #这里限制4M以下的图片不下载,可以在此基础上更改
     if file_size > 4194304:
         self.store.persist_file(path, buf, info)
         return checksum
     else:
         pass
Example #40
0
    def file_downloaded(self, response, request, info):
        # The downloaded file is a XML file which stores the actual RTF file as
        # a base64 encoded string. Here we extract and decode that value.
        data = xmltodict.parse(response.body)
        data = data.get('sessao').get('discursoRTFBase64')
        data = base64.b64decode(data)

        # And here we basically do the same as super, but using our
        # decoded data instead of `response.body`
        path = self.file_path(request, response=response, info=info)
        buf = BytesIO(data)
        self.store.persist_file(path, buf, info)
        checksum = md5sum(buf)
        return checksum
Example #41
0
 def image_downloaded(self, response, request, info):
     checksum = None
     for key, image, buf in self.get_images(response, request, info):
         if checksum is None:
             buf.seek(0)
             checksum = md5sum(buf)
             folder, filename = self.get_path_from_url(response.url)
             # to do, move the folder under screenshot containers.
             folder_path = os.path.join("images", folder)
             if not os.path.exists(folder_path):
                 os.makedirs(folder_path)
             filepath = os.path.join(folder_path, filename)
             if not os.path.exists(filepath):
                 image.save(filepath)
     return checksum
Example #42
0
 def file_downloaded(self, response, request, info):
     path = self.file_path(request, response=response, info=info)
     content = self.modify_response(response)
     self.loop.append(content)
     if len(self.loop) == request.meta['file_urls_len']:
         cont = ''
         for item in self.loop:
             cont += item + '\n'
         buf = BytesIO(cont.encode())
         checksum = md5sum(buf)
         buf.seek(0)
         self.store.persist_file(path, buf, info)
         self.loop = []
         return checksum
     return None
Example #43
0
 def image_downloaded(self, response, request, info):
     checksum = None
     for key, image, buf in self.get_images(response, request, info):
         if checksum is None:
             buf.seek(0)
             checksum = md5sum(buf)
             folder, filename = self.get_path_from_url(response.url)
             #to do, move the folder under screenshot containers.
             folder_path = os.path.join('images', folder)
             if not os.path.exists(folder_path):
                 os.makedirs(folder_path)
             filepath = os.path.join(folder_path, filename)
             if not os.path.exists(filepath):
                 image.save(filepath)
     return checksum
Example #44
0
    def image_downloaded(self, response, request, info):
        checksum = None

        for width, height, url_sha2, phash_str, buf in self.get_images(
                response, request, info):
            if checksum is None:
                buf.seek(0)
                checksum = md5sum(buf)
            # self.store.persist_file(
            #     path, buf, info,
            #     meta={'width': width, 'height': height},
            #     headers={'Content-Type': 'image/jpeg'})

            #    hashString="".join([1 if x else 0 for x in hash])
        return width, height, url_sha2, phash_str, checksum
Example #45
0
    def write_item(self, item):
        title = item.get('title', 'Untitled')
        header = """<html lang="en">
<head>
<meta charset="utf-8" />
<title>%s</title>
</head>
<body>
""" % title

        body = self.make_body(item, title)
        closer = """
</body>
</html>
"""

        url = item['location']
        media_guid = hashlib.sha1(url).hexdigest()
        media_ext = '.html' 
        path = 'full/%s%s' % (media_guid, media_ext)
        absolute_path = os.path.join(self.store.basedir, path)
        with codecs.open(absolute_path, 'wb', 'utf-8') as f:
            f.write(header)
            f.write(body)
            f.write(closer)

        item['inline_urls']  = [ urljoin('file://', pathname2url(absolute_path)) ]
        item['inline_metas'] = [ { 'link_url': item['request_url'], 'location': item['location'], 
                'title': title, 'content_type': 'text/html'} ]

        checksum = None
        with open(absolute_path, 'rb') as f:
            checksum = md5sum(f)

        # Compatible with Twisted Deferred results
        results = [
            (True, {'url': url,
                'path': path,
                'checksum': checksum }
            )
        ]

        item = self.item_completed(results, item, self.spiderinfo)
        return item
Example #46
0
    def fileparse(self, fname ):
        """ fileparse
        获取文件信息
        返回数据结构:
        {
            "file_name"    :  "8845970f239f7fbd1c2c6f81861e92a81a43b32e.apk", 
            "file_md5"     :  "3bb744c851097281aa64180a8c9a1c3b", 
            "file_size"    :  946163, 
            "file_path"    :  '/tmp', 
        }
        """
        info = {}
        info['file_path'] = os.path.dirname(fname)
        info['file_name'] = os.path.basename(fname)
        info['file_size'] = os.path.getsize(fname)
        with open(fname,'r') as fh:
            info['file_md5'] = md5sum(fh)

        return info
    def file_downloaded(self, response, request, info):

        path = self.file_path(request, response=response, info=info)
        buf = StringIO(response.body)
        txt = self.text_extract(buf)
        lns = [ln.strip() for ln in txt.splitlines()]
        dd = parser.parse(lns[1]).strftime('%Y-%m-%d')

        data = self.get_index_data(dd, lns)
        for d in data:
            log.msg("|".join(d), level=log.INFO)

        reg_mkt = self.market_data(dd, lns, "Regular Market")

        for r in reg_mkt:
            for el in r:
                log.msg(str(el), level=log.INFO)

        self.store.persist_file(path, buf, info)
        checksum = md5sum(buf)
        return checksum
Example #48
0
    def stat_file(self, key, info):
        """
            the stat is the file key dir,
            the last_modified is the file that saved to the file key dir.
        """
        
        keydir = os.path.join(self.basedir, *key.split('/'))
        filenames = os.listdir(keydir)
        if len(filenames) != 1:
            shutil.rmtree(keydir,True)
            return {}
        else:
            filename = list_first_item(filenames)
        
        absolute_path = self._get_filesystem_path(key)
        try:
            last_modified = os.path.getmtime(absolute_path)
        except:  # FIXME: catching everything!
            return {}

        with open(os.path.join(absolute_path,filename), 'rb') as file_content:
            checksum = md5sum(file_content)

        return {'last_modified': last_modified, 'checksum': checksum}
Example #49
0
    def stat_file(self, path, info):

        image_buf = self.__hub[path][0]
        checksum = md5sum(image_buf)

        return {'last_modified': "", 'checksum': checksum}
 def file_downloaded(self, response, request, info):
   path, image_buf = self.get_image(response, request, info)
   image_buf.seek(0)
   checksum = md5sum(image_buf)
   self.store.persist_file(path, image_buf, info)
   return checksum
Example #51
0
 def file_downloaded(self, response, request, info):
     key = self.file_key(request.url)
     buf = StringIO(response.body)
     self.store.persist_file(key, buf, info)
     checksum = md5sum(buf)
     return checksum