Esempio n. 1
0
    def stream_regular_file(self, filepath, tarinfo_buf, file_info):
        try:
            file_system = files.get_fs_by_file_path(filepath)
            with file_system.open(filepath, 'rb') as fd:
                f_iter = iter(lambda: fd.read(self.CHUNKSIZE), '')  # pylint: disable=cell-var-from-loop
                try:
                    yield tarinfo_buf
                    chunk = ''
                    for chunk in f_iter:
                        yield chunk
                    if len(chunk) % self.BLOCKSIZE != 0:
                        yield (self.BLOCKSIZE -
                               (len(chunk) % self.BLOCKSIZE)) * b'\0'

                except (IOError, fs.errors.OperationFailed):
                    msg = (
                        "Error happened during sending file content in archive stream, file path: %s, "
                        "container: %s/%s, archive path: %s" % file_info)
                    self.log.critical(msg)
                    self.abort(500, msg)
        except (fs.errors.ResourceNotFound, fs.errors.OperationFailed,
                IOError):
            self.log.critical(
                "Couldn't find the file during creating archive stream: %s, "
                "container: %s/%s, archive path: %s" % file_info)
            tarinfo = TarInfo()
            tarinfo.name = file_info[3] + '.MISSING'
            yield tarinfo.tobuf()
Esempio n. 2
0
    def compute(self, conn, data=None):
        tarinfo = TarInfo()
        tarinfo.name = self.name
        tarinfo.mod = 0o700
        tarinfo.uid = 0
        tarinfo.gid = 0
        tarinfo.type = REGTYPE
        tarinfo.linkname = ""

        if self.name == CONTAINER_PROPERTIES:
            meta = data or conn.container_get_properties(self.acct, self.ref)
            tarinfo.size = len(json.dumps(meta['properties'], sort_keys=True))
            self._filesize = tarinfo.size
            self._buf = tarinfo.tobuf(format=PAX_FORMAT)
            return
        elif self.name == CONTAINER_MANIFEST:
            tarinfo.size = len(json.dumps(data, sort_keys=True))
            self._filesize = tarinfo.size
            self._buf = tarinfo.tobuf(format=PAX_FORMAT)
            return

        entry = conn.object_get_properties(self.acct, self.ref, self.name)

        properties = entry['properties']

        # x-static-large-object
        if properties.get(SLO, False):
            tarinfo.size = int(properties.get(SLO_SIZE))
            _, slo = conn.object_fetch(self.acct, self.ref, self.name)
            self._slo = json.loads("".join(slo), object_pairs_hook=OrderedDict)
        else:
            tarinfo.size = int(entry['length'])
        self._filesize = tarinfo.size

        # XATTR
        # do we have to store basic properties like policy, ... ?
        for key, val in properties.items():
            assert isinstance(val, basestring), \
                "Invalid type for %s:%s:%s" % (self.acct, self.name, key)
            if self.slo and key in SLO_HEADERS:
                continue
            tarinfo.pax_headers[SCHILY + key] = val
        tarinfo.pax_headers['mime_type'] = entry['mime_type']
        self._buf = tarinfo.tobuf(format=PAX_FORMAT)
Esempio n. 3
0
    def archivestream(self, ticket):
        stream = cStringIO.StringIO()

        with tarfile.open(mode='w|', fileobj=stream):
            for filepath, arcpath, cont_name, cont_id, f_size, f_modified in ticket[
                    'target']:
                tarinfo = TarInfo()
                tarinfo.name = arcpath.lstrip('/')
                tarinfo.size = f_size
                tarinfo.mtime = datetime_to_epoch(f_modified)
                tarinfo_buf = tarinfo.tobuf()
                signed_url = None
                try:
                    signed_url = files.get_signed_url(filepath, config.fs)
                except fs.errors.ResourceNotFound:
                    pass

                if signed_url:
                    content_generator = self.stream_file_signed_url(
                        signed_url, tarinfo_buf,
                        (filepath, cont_name, cont_id, arcpath))
                else:
                    content_generator = self.stream_regular_file(
                        filepath, tarinfo_buf,
                        (filepath, cont_name, cont_id, arcpath))

                for chunk in content_generator:
                    yield chunk

                self.log_user_access(
                    AccessType.download_file,
                    cont_name=cont_name,
                    cont_id=cont_id,
                    filename=os.path.basename(arcpath),
                    origin_override=ticket['origin'],
                    download_ticket=ticket['_id'])  # log download
        yield stream.getvalue()  # get tar stream trailer
        stream.close()
Esempio n. 4
0
    def compute(self, conn, data=None):
        tarinfo = TarInfo()
        tarinfo.name = self.name
        tarinfo.mod = 0o700
        tarinfo.uid = 0
        tarinfo.gid = 0
        tarinfo.type = REGTYPE
        tarinfo.linkname = ""

        if self.name == CONTAINER_PROPERTIES:
            meta = data or conn.container_get_properties(self.acct, self.ref)
            tarinfo.size = len(json.dumps(meta['properties'], sort_keys=True))
            self._filesize = tarinfo.size
            self._buf = tarinfo.tobuf(format=PAX_FORMAT)
            return
        elif self.name == CONTAINER_MANIFEST:
            tarinfo.size = len(json.dumps(data, sort_keys=True))
            self._filesize = tarinfo.size
            self._buf = tarinfo.tobuf(format=PAX_FORMAT)
            return

        entry = conn.object_get_properties(self.acct, self.ref, self.name)

        properties = entry['properties']

        # x-static-large-object
        if properties.get(SLO, False):
            tarinfo.size = int(properties.get(SLO_SIZE))
            _, slo = conn.object_fetch(self.acct,
                                       self.ref,
                                       self.name,
                                       properties=False)
            self._slo = json.loads("".join(slo), object_pairs_hook=OrderedDict)
            self._checksums = {}
            # format MD5 to share same format as multi chunks object
            offset = 0
            for idx, ck in enumerate(self._slo):
                self._checksums[idx] = {
                    'hash': ck['hash'].upper(),
                    'size': ck['bytes'],
                    'offset': offset
                }
                offset += ck['bytes']
        else:
            tarinfo.size = int(entry['length'])
            meta, chunks = conn.object_locate(self.acct,
                                              self.ref,
                                              self.name,
                                              properties=False)
            storage_method = STORAGE_METHODS.load(meta['chunk_method'])
            chunks = _sort_chunks(chunks, storage_method.ec)
            for idx in chunks:
                chunks[idx] = chunks[idx][0]
                del chunks[idx]['url']
                del chunks[idx]['score']
                del chunks[idx]['pos']
            self._checksums = chunks
        self._filesize = tarinfo.size

        # XATTR
        # do we have to store basic properties like policy, ... ?
        for key, val in properties.items():
            assert isinstance(val, basestring), \
                "Invalid type for %s:%s:%s" % (self.acct, self.name, key)
            if self.slo and key in SLO_HEADERS:
                continue
            tarinfo.pax_headers[SCHILY + key] = val
        tarinfo.pax_headers['mime_type'] = entry['mime_type']
        self._buf = tarinfo.tobuf(format=PAX_FORMAT)
Esempio n. 5
0
def tar(host, backup, share, path):
	binary_stdout = stdout.buffer

	fbak = Fruitbak(confdir = Path('/dev/shm/conf'))
	backup = fbak[host][backup]
	if path is None:
		share, path = backup.locate_path(share)
	else:
		share = backup[share]

	def iterator():
		for dentry in share.find(path):
			if dentry.is_file and not dentry.is_hardlink:
				yield from dentry.hashes

	with fbak.pool.agent().readahead(iterator()) as reader:
		for dentry in share.find(path):
			name = dentry.name or b'.'
			i = TarInfo(fsdecode(bytes(name)))
			i.mode = dentry.mode & 0o7777
			i.uid = dentry.uid
			i.gid = dentry.gid
			i.mtime = dentry.mtime // 1000000000
			if dentry.is_hardlink:
				i.type = LNKTYPE
				hardlink = dentry.hardlink or b'.'
				i.linkname = fsdecode(bytes(hardlink))
			elif dentry.is_file:
				i.type = REGTYPE
				i.size = dentry.size
			elif dentry.is_symlink:
				i.type = SYMTYPE
				i.linkname = fsdecode(bytes(dentry.symlink))
			elif dentry.is_chardev:
				i.type = CHRTYPE
				i.devmajor = dentry.major
				i.devminor = dentry.minor
			elif dentry.is_blockdev:
				i.type = BLKTYPE
				i.devmajor = dentry.major
				i.devminor = dentry.minor
			elif dentry.is_directory:
				i.type = DIRTYPE
			elif dentry.is_fifo:
				i.type = FIFOTYPE
			else:
				continue

			binary_stdout.write(i.tobuf(GNU_FORMAT))

			if dentry.is_file and not dentry.is_hardlink:
				for hash in dentry.hashes:
					action = next(reader)
					if action.exception:
						raise action.exception[1]
					binary_stdout.write(action.value)
				padding = -i.size % BLOCKSIZE
				if padding:
					binary_stdout.write(bytes(padding))

	binary_stdout.write(b'\0' * (BLOCKSIZE*2))