예제 #1
0
def _clean_info(
    root: Optional[str], epoch: int, info: tarfile.TarInfo
) -> tarfile.TarInfo:
    """
    Remove variable data from an archive entry.

    :param root: absolute path to the root directory from which the
        entry was added, or None to disable turning the name into a
        relative path
    :param epoch: fixed modification time to set
    :param info: tarinfo object to set
    :returns: changed tarinfo
    """
    if root is not None:
        info.name = os.path.relpath("/" + info.name, root)

    if not info.name.startswith("."):
        info.name = "./" + info.name

    info.uid = 0
    info.gid = 0
    info.uname = ""
    info.gname = ""
    info.mtime = epoch

    return info
예제 #2
0
    def stream_regular_file(self, filepath, tarinfo_buf, file_info):
        try:
            file_system = files.get_fs_by_file_path(filepath)
            with file_system.open(filepath, 'rb') as fd:
                f_iter = iter(lambda: fd.read(self.CHUNKSIZE), '')  # pylint: disable=cell-var-from-loop
                try:
                    yield tarinfo_buf
                    chunk = ''
                    for chunk in f_iter:
                        yield chunk
                    if len(chunk) % self.BLOCKSIZE != 0:
                        yield (self.BLOCKSIZE -
                               (len(chunk) % self.BLOCKSIZE)) * b'\0'

                except (IOError, fs.errors.OperationFailed):
                    msg = (
                        "Error happened during sending file content in archive stream, file path: %s, "
                        "container: %s/%s, archive path: %s" % file_info)
                    self.log.critical(msg)
                    self.abort(500, msg)
        except (fs.errors.ResourceNotFound, fs.errors.OperationFailed,
                IOError):
            self.log.critical(
                "Couldn't find the file during creating archive stream: %s, "
                "container: %s/%s, archive path: %s" % file_info)
            tarinfo = TarInfo()
            tarinfo.name = file_info[3] + '.MISSING'
            yield tarinfo.tobuf()
예제 #3
0
    def _test_roundtrip(self, context):
        path = context.given_file()
        content = b'test content'
        filename = 'archived-file.txt'

        with xtarfile_open(path, context.mode('w')) as archive:
            buffer = BytesIO()
            buffer.write(content)
            buffer.seek(0)

            tarinfo = TarInfo()
            tarinfo.size = len(content)
            tarinfo.name = filename

            archive.addfile(tarinfo, buffer)

        with xtarfile_open(path, context.mode('r')) as archive:
            while True:
                member = archive.next()
                if member is None:
                    self.fail('{} not found in archive'.format(filename))
                if member.name == filename:
                    buffer = archive.extractfile(member)
                    actual_content = buffer.read()
                    break

        self.assertEqual(actual_content, content)
예제 #4
0
    def _unpack_info_file(self, tar: TarFile, member: TarInfo,
                          fileobj: io.BytesIO):
        directory = Path("var", "lib", "dpkg", "info").as_posix()
        name = member.name.lstrip("./")

        member.name = f"./{directory}/{self.package.name}.{name}"

        tar.addfile(member, fileobj)
예제 #5
0
파일: tarcms.py 프로젝트: yasusii/fooling
 def create_article(self, data, info=None):
   if not self._mode: raise TarCMS.TarCMSError('not open: %r' % self)
   if info is None:
     info = TarInfo()
   assert isinstance(info, TarInfo)
   aid = '%08x' % self._artdb.nextrecno()
   info.name = aid+info.name
   tid = self._add_corpus(info, data)
   assert aid == tid
   self._artdb.add_record(tid)
   return aid
예제 #6
0
파일: tarcms.py 프로젝트: yasusii/fooling
 def create_article(self, data, info=None):
     if not self._mode: raise TarCMS.TarCMSError('not open: %r' % self)
     if info is None:
         info = TarInfo()
     assert isinstance(info, TarInfo)
     aid = '%08x' % self._artdb.nextrecno()
     info.name = aid + info.name
     tid = self._add_corpus(info, data)
     assert aid == tid
     self._artdb.add_record(tid)
     return aid
예제 #7
0
 def _addMember(path, data, modtime):
     from tarfile import DIRTYPE
     elements = path.split('/')
     parents = filter(None, [elements[x] for x in range(len(elements))])
     for parent in parents:
         info = TarInfo()
         info.name = parent
         info.size = 0
         info.mtime = mod_time
         info.type = DIRTYPE
         archive.addfile(info, StringIO())
     _addOneMember(path, data, modtime)
예제 #8
0
 def _addMember(path, data, modtime):
     from tarfile import DIRTYPE
     elements = path.split('/')
     parents = filter(None, [elements[x] for x in range(len(elements))])
     for parent in parents:
         info = TarInfo()
         info.name = parent
         info.size = 0
         info.mtime = mod_time
         info.type = DIRTYPE
         archive.addfile(info, StringIO())
     _addOneMember(path, data, modtime)
예제 #9
0
파일: backup.py 프로젝트: uneidel/oio-sds
    def compute(self, conn, data=None):
        tarinfo = TarInfo()
        tarinfo.name = self.name
        tarinfo.mod = 0o700
        tarinfo.uid = 0
        tarinfo.gid = 0
        tarinfo.type = REGTYPE
        tarinfo.linkname = ""

        if self.name == CONTAINER_PROPERTIES:
            meta = data or conn.container_get_properties(self.acct, self.ref)
            tarinfo.size = len(json.dumps(meta['properties'], sort_keys=True))
            self._filesize = tarinfo.size
            self._buf = tarinfo.tobuf(format=PAX_FORMAT)
            return
        elif self.name == CONTAINER_MANIFEST:
            tarinfo.size = len(json.dumps(data, sort_keys=True))
            self._filesize = tarinfo.size
            self._buf = tarinfo.tobuf(format=PAX_FORMAT)
            return

        entry = conn.object_get_properties(self.acct, self.ref, self.name)

        properties = entry['properties']

        # x-static-large-object
        if properties.get(SLO, False):
            tarinfo.size = int(properties.get(SLO_SIZE))
            _, slo = conn.object_fetch(self.acct, self.ref, self.name)
            self._slo = json.loads("".join(slo), object_pairs_hook=OrderedDict)
        else:
            tarinfo.size = int(entry['length'])
        self._filesize = tarinfo.size

        # XATTR
        # do we have to store basic properties like policy, ... ?
        for key, val in properties.items():
            assert isinstance(val, basestring), \
                "Invalid type for %s:%s:%s" % (self.acct, self.name, key)
            if self.slo and key in SLO_HEADERS:
                continue
            tarinfo.pax_headers[SCHILY + key] = val
        tarinfo.pax_headers['mime_type'] = entry['mime_type']
        self._buf = tarinfo.tobuf(format=PAX_FORMAT)
예제 #10
0
    def archivestream(self, ticket):
        stream = cStringIO.StringIO()

        with tarfile.open(mode='w|', fileobj=stream):
            for filepath, arcpath, cont_name, cont_id, f_size, f_modified in ticket[
                    'target']:
                tarinfo = TarInfo()
                tarinfo.name = arcpath.lstrip('/')
                tarinfo.size = f_size
                tarinfo.mtime = datetime_to_epoch(f_modified)
                tarinfo_buf = tarinfo.tobuf()
                signed_url = None
                try:
                    signed_url = files.get_signed_url(filepath, config.fs)
                except fs.errors.ResourceNotFound:
                    pass

                if signed_url:
                    content_generator = self.stream_file_signed_url(
                        signed_url, tarinfo_buf,
                        (filepath, cont_name, cont_id, arcpath))
                else:
                    content_generator = self.stream_regular_file(
                        filepath, tarinfo_buf,
                        (filepath, cont_name, cont_id, arcpath))

                for chunk in content_generator:
                    yield chunk

                self.log_user_access(
                    AccessType.download_file,
                    cont_name=cont_name,
                    cont_id=cont_id,
                    filename=os.path.basename(arcpath),
                    origin_override=ticket['origin'],
                    download_ticket=ticket['_id'])  # log download
        yield stream.getvalue()  # get tar stream trailer
        stream.close()
예제 #11
0
    def compute(self, conn, data=None):
        tarinfo = TarInfo()
        tarinfo.name = self.name
        tarinfo.mod = 0o700
        tarinfo.uid = 0
        tarinfo.gid = 0
        tarinfo.type = REGTYPE
        tarinfo.linkname = ""

        if self.name == CONTAINER_PROPERTIES:
            meta = data or conn.container_get_properties(self.acct, self.ref)
            tarinfo.size = len(json.dumps(meta['properties'], sort_keys=True))
            self._filesize = tarinfo.size
            self._buf = tarinfo.tobuf(format=PAX_FORMAT)
            return
        elif self.name == CONTAINER_MANIFEST:
            tarinfo.size = len(json.dumps(data, sort_keys=True))
            self._filesize = tarinfo.size
            self._buf = tarinfo.tobuf(format=PAX_FORMAT)
            return

        entry = conn.object_get_properties(self.acct, self.ref, self.name)

        properties = entry['properties']

        # x-static-large-object
        if properties.get(SLO, False):
            tarinfo.size = int(properties.get(SLO_SIZE))
            _, slo = conn.object_fetch(self.acct,
                                       self.ref,
                                       self.name,
                                       properties=False)
            self._slo = json.loads("".join(slo), object_pairs_hook=OrderedDict)
            self._checksums = {}
            # format MD5 to share same format as multi chunks object
            offset = 0
            for idx, ck in enumerate(self._slo):
                self._checksums[idx] = {
                    'hash': ck['hash'].upper(),
                    'size': ck['bytes'],
                    'offset': offset
                }
                offset += ck['bytes']
        else:
            tarinfo.size = int(entry['length'])
            meta, chunks = conn.object_locate(self.acct,
                                              self.ref,
                                              self.name,
                                              properties=False)
            storage_method = STORAGE_METHODS.load(meta['chunk_method'])
            chunks = _sort_chunks(chunks, storage_method.ec)
            for idx in chunks:
                chunks[idx] = chunks[idx][0]
                del chunks[idx]['url']
                del chunks[idx]['score']
                del chunks[idx]['pos']
            self._checksums = chunks
        self._filesize = tarinfo.size

        # XATTR
        # do we have to store basic properties like policy, ... ?
        for key, val in properties.items():
            assert isinstance(val, basestring), \
                "Invalid type for %s:%s:%s" % (self.acct, self.name, key)
            if self.slo and key in SLO_HEADERS:
                continue
            tarinfo.pax_headers[SCHILY + key] = val
        tarinfo.pax_headers['mime_type'] = entry['mime_type']
        self._buf = tarinfo.tobuf(format=PAX_FORMAT)
예제 #12
0
def write_tar(archive_url,
              manifest_path,
              tar_path,
              strip_prefix=None,
              progress_bar=False,
              overwrite=False):
    """
        Write all objects from archive_url to tar_path.
        Write list of objects to manifest_path.
    """
    if not overwrite:
        if exists(tar_path):
            raise IOError("%s already exists." % tar_path)
        if exists(manifest_path):
            raise IOError("%s already exists." % manifest_path)

    # get iterator of items to tar, and check that it includes at least one item
    objects = list_objects(archive_url)
    try:
        _, objects = peek(iter(objects))
    except StopIteration:
        raise IOError("No objects found at %s" % archive_url)

    # write tar
    make_parent_dir(tar_path)
    files_written = []
    with open(tar_path, 'wb', ignore_ext=True) as tar_out, \
         LoggingTarFile.open(fileobj=tar_out, mode='w|') as tar, \
         TemporaryDirectory() as temp_dir:

        # load object contents in background threads
        items = threaded_queue(load_object,
                               ((obj, temp_dir) for obj in objects))

        # tar each item
        for obj, response, body in tqdm(items, disable=not progress_bar):
            body = HashingFile(body)
            tar_info = TarInfo()
            tar_info.size = int(response['ContentLength'])
            tar_info.mtime = response['LastModified'].timestamp()
            tar_info.name = obj.key
            if strip_prefix and tar_info.name.startswith(strip_prefix):
                tar_info.name = tar_info.name[len(strip_prefix):]
            tar.addfile(tar_info, body)
            member = tar.members[-1]
            files_written.append(
                OrderedDict((
                    # inventory fields
                    ('Bucket', obj.bucket_name),
                    ('Key', obj.key),
                    ('Size', response['ContentLength']),
                    ('LastModifiedDate', response['LastModified'].isoformat()),
                    ('ETag', response['ETag'].strip('"')),
                    ('StorageClass', response.get('StorageClass', 'STANDARD')),
                    ('VersionId', response.get('VersionId', '')),
                    # ('Owner', obj.owner['DisplayName'] if obj.owner else ''),
                    # tar fields
                    ('TarMD5', body.hexdigest()),
                    ('TarOffset', member.offset),
                    ('TarDataOffset', member.offset_data),
                    ('TarSize', member.size),
                ) + ((('TarStrippedPrefix',
                       strip_prefix), ) if strip_prefix else tuple())))
            if response['ContentLength'] != member.size:
                raise ValueError("Object size mismatch: %s" % obj.key)

    # write csv
    make_parent_dir(manifest_path)
    files_written.sort(key=lambda f: f['Key'])
    write_dicts_to_csv(manifest_path, files_written)
예제 #13
0
 def strip_prefix(self, prefix: str, member: tarfile.TarInfo):
     member.name = self.strip_slash(prefix, member.name)
     # Strip hardlinks
     if member.islnk() and not member.issym():
         member.linkname = self.strip_slash(prefix, member.linkname)
예제 #14
0
 def filter1(tarinfo: tarfile.TarInfo):
     tarinfo.name = tarinfo.name[len(path):]
     return tarinfo