def run_ics(group: str, turtle: bytes, extra_files: List[str] = (), extra_data: List[str] = ()): client = docker.from_env() files = ['data.ttl'] if len(extra_files) > 0: files.extend(extra_files) tests = client.containers.create( 'gsscogs/gdp-sparql-tests', command=f'''sparql-test-runner -t /usr/local/tests/{group} -p dsgraph='<urn:x-arq:DefaultGraph>' ''' f'''{" ".join('/tmp/' + f for f in files)}''' ) archive = BytesIO() with TarFile(fileobj=archive, mode='w') as t: ttl = TarInfo('data.ttl') ttl.size = len(turtle) ttl.mtime = time.time() t.addfile(ttl, BytesIO(turtle)) for filename in extra_files: actual_path = Path('features') / 'fixtures' / 'extra' / filename with actual_path.open('rb') as actual_file: extra_file = t.gettarinfo(arcname=filename, fileobj=actual_file) t.addfile(extra_file, actual_file) for i, add_turtle in enumerate(extra_data): filename = f'extra_{i}.ttl' add_ttl = TarInfo(filename) add_ttl.size = len(add_turtle) add_ttl.mtime = time.time() t.addfile(add_ttl, BytesIO(add_turtle.encode('utf-8'))) files.append(filename) archive.seek(0) tests.put_archive('/tmp/', archive) tests.start() response = tests.wait() sys.stdout.write(tests.logs().decode('utf-8')) return response['StatusCode']
def write(self, file_name): if not self.data or not os.path.isdir(self.data): raise Exception('Must set data before building') gzfile = GzipFile(file_name, 'w') tar = TarFile(fileobj=gzfile, mode='w') buff = BytesIO(json.dumps(self.control).encode()) info = TarInfo(name='./CONTROL') info.size = buff.getbuffer().nbytes tar.addfile(tarinfo=info, fileobj=buff) if self.init is not None: buff = BytesIO(self.init.encode()) info = TarInfo(name='./INIT') info.size = buff.getbuffer().nbytes tar.addfile(tarinfo=info, fileobj=buff) data = BytesIO() datatar = TarFile(fileobj=data, mode='w') datatar.add(self.data, '/') datatar.close() data.seek(0) info = TarInfo(name='./DATA') info.size = data.getbuffer().nbytes tar.addfile(tarinfo=info, fileobj=data) tar.close() gzfile.close()
def tarball_images( images: List[Image.Image], *, name: str = None, animated: bool = False, format: str = "png", extras: List[Tuple[str, BytesIO]], ) -> BytesIO: fp = BytesIO() tar = TarFile(mode="w", fileobj=fp) for idx, image in enumerate(images): f = BytesIO() if animated: image[0].save(f, format, append_images=image[1:], save_all=True, loop=0) else: image.save(f, format) f.seek(0) if name: info = TarInfo(f"{name}_{idx}.{format}") else: info = TarInfo(f"{idx}.{format}") info.size = len(f.getbuffer()) tar.addfile(info, fileobj=f) for extra in extras: info = TarInfo(extra[0] or "_.txt") info.size = len(extra[1].getbuffer()) tar.addfile(info, fileobj=extra[1]) fp.seek(0) return fp
def _build_image_tar(self, target_path): with tarfile.open(target_path, "w:gz") as image_tar: for directory in [ ["./", "var", "lib", "dpkg", "info"], ["./", "var", "log"], ]: info = TarInfo("./" + Path(*directory).as_posix()) info.type = tarfile.DIRTYPE image_tar.addfile(info) for file in [["var", "log", "dpkg.log"]]: image_tar.addfile(TarInfo("./" + Path(*file).as_posix())) status_file = io.BytesIO() for deb_file in self.files: deb_file.unpack_into_tar(image_tar, status_file) status_info = TarInfo( "./" + Path("var", "lib", "dpkg", "status").as_posix()) status_info.size = status_file.getbuffer().nbytes status_file.seek(0) image_tar.addfile(status_info, status_file) status_file.close()
def test_09__verify_version(self, mock_isfile, mock_fumktmp, mock_osbase, mock_fugetdata, mock_furm, mock_versionok): """Test09 UdockerTools()._verify_version().""" tball = "/home/udocker.tar" mock_isfile.return_value = False utools = UdockerTools(self.local) status = utools._verify_version(tball) self.assertTrue(mock_isfile.called) self.assertEqual(status, (False, "")) tball = "/home/udocker.tar" mock_isfile.return_value = True mock_fumktmp.return_value = "" utools = UdockerTools(self.local) status = utools._verify_version(tball) self.assertTrue(mock_isfile.called) self.assertTrue(mock_fumktmp.called) self.assertEqual(status, (False, "")) tball = "/home/udocker.tar" tinfo1 = TarInfo("udocker_dir/lib/VERSION") tinfo2 = TarInfo("a") mock_isfile.return_value = True mock_fumktmp.return_value = "/home/tmp" mock_osbase.return_value = "VERSION" mock_fugetdata.return_value = "1.2.7" mock_furm.return_value = None mock_versionok.return_value = True with patch.object(tarfile, 'open', autospec=True) as open_mock: open_mock.return_value.getmembers.return_value = [tinfo2, tinfo1] open_mock.return_value.extract.return_value = None utools = UdockerTools(self.local) status = utools._verify_version(tball) self.assertEqual(status, (True, "1.2.7")) self.assertTrue(mock_furm.called)
def writeDataFile(self, filename, text, content_type, subdir=None): """ See IExportContext. """ if subdir is not None: filename = '/'.join((subdir, filename)) parents = filename.split('/')[:-1] while parents: path = '/'.join(parents) + '/' if path not in self._archive.getnames(): info = TarInfo(path) info.type = DIRTYPE # tarfile.filemode(0o755) == '-rwxr-xr-x' info.mode = 0o755 info.mtime = time.time() self._archive.addfile(info) parents.pop() info = TarInfo(filename) if isinstance(text, six.text_type): encoding = self.getEncoding() or 'utf-8' text = text.encode(encoding) if isinstance(text, six.binary_type): stream = BytesIO(text) info.size = len(text) else: # Assume text is a an instance of a class like # Products.Archetypes.WebDAVSupport.PdataStreamIterator, # as in the case of ATFile stream = text.file info.size = text.size info.mtime = time.time() self._archive.addfile(info, stream)
def test_10__install(self, mock_isfile, mock_futil, mock_osbase): """Test10 UdockerTools()._install().""" tfile = "" mock_isfile.return_value = False utools = UdockerTools(self.local) status = utools._install(tfile) self.assertFalse(status) tinfo1 = TarInfo("udocker_dir/bin/ls") tinfo2 = TarInfo("udocker_dir/lib/lib1") tfile = "udocker.tar" mock_isfile.return_value = True mock_futil.return_value.chmod.return_value = None mock_futil.return_value.rchmod.side_effect = [ None, None, None, None, None, None ] mock_osbase.side_effect = ["ls", "ls", "lib1", "lib1", "doc", "doc1"] self.local.create_repo.return_value = None with patch.object(tarfile, 'open', autospec=True) as open_mock: open_mock.return_value.getmembers.side_effect = [[tinfo1, tinfo2], [tinfo1, tinfo2], [tinfo1, tinfo2]] open_mock.return_value.extract.side_effect = [None, None] utools = UdockerTools(self.local) status = utools._install(tfile) self.assertTrue(status) self.assertTrue(mock_futil.called) self.assertTrue(mock_futil.return_value.rchmod.call_count, 4)
def writeDataFile(self, filename, text, content_type, subdir=None): """ See IExportContext. """ mod_time = time.time() if subdir is not None: elements = subdir.split('/') parents = filter(None, elements) while parents: dirname = os.path.join(*parents) try: self._archive.getmember(dirname + '/') except KeyError: info = TarInfo(dirname) info.size = 0 info.mode = 509 info.mtime = mod_time info.type = DIRTYPE self._archive.addfile(info, StringIO()) parents = parents[:-1] filename = '/'.join((subdir, filename)) stream = StringIO(text) info = TarInfo(filename) info.size = len(text) info.mode = 436 info.mtime = mod_time self._archive.addfile(info, stream)
def run_csvlint(context): client = docker.from_env() csvlint = client.containers.create( 'gsscogs/csvlint', command=f'csvlint -s /tmp/{context.metadata_filename}') archive = BytesIO() context.metadata_io.seek(0, SEEK_END) metadata_size = context.metadata_io.tell() context.metadata_io.seek(0) context.csv_io.seek(0, SEEK_END) csv_size = context.csv_io.tell() context.csv_io.seek(0) with TarFile(fileobj=archive, mode='w') as t: tis = TarInfo(str(context.metadata_filename)) tis.size = metadata_size tis.mtime = time.time() t.addfile(tis, BytesIO(context.metadata_io.read().encode('utf-8'))) tic = TarInfo(str(context.csv_filename)) tic.size = csv_size tic.mtime = time.time() t.addfile(tic, BytesIO(context.csv_io.read().encode('utf-8'))) if hasattr(context, 'codelists'): t.add(Path('features') / 'fixtures' / context.codelists, arcname=context.codelists) archive.seek(0) csvlint.put_archive('/tmp/', archive) csvlint.start() response = csvlint.wait() sys.stdout.write(csvlint.logs().decode('utf-8')) return (response, csvlint.logs().decode('utf-8'))
def writeDataFile(self, filename, text, content_type, subdir=None): """ See IExportContext. """ if subdir is not None: filename = '/'.join((subdir, filename)) parents = filename.split('/')[:-1] while parents: path = '/'.join(parents) + '/' if path not in self._archive.getnames(): info = TarInfo(path) info.type = DIRTYPE # tarfile.filemode(0755) == '-rwxr-xr-x' info.mode = 0755 info.mtime = time.time() self._archive.addfile(info) parents.pop() info = TarInfo(filename) if isinstance(text, str): stream = StringIO(text) info.size = len(text) elif isinstance(text, unicode): raise ValueError("Unicode text is not supported, even if it only " "contains ascii. Please encode your data") else: # Assume text is a an instance of a class like # Products.Archetypes.WebDAVSupport.PdataStreamIterator, # as in the case of ATFile stream = text.file info.size = text.size info.mtime = time.time() self._archive.addfile(info, stream)
def test_basic(self): # writing db = TarDB(dirname).open(mode='w') data_foo = '123' data_bar = 'ABCDEF' db.add_record(TarInfo('foo'), data_foo) db.add_record(TarInfo('bar'), data_bar) db.close() # files = os.listdir(dirname) self.assertEqual(len(files), 3) self.assertTrue('catalog' in files) self.assertTrue('lock' in files) self.assertTrue('db00000.tar' in files) # reading db = TarDB(dirname).open() (info1, data1) = db.get_record(0) self.assertEqual(data1, data_foo) self.assertEqual(len(data1), info1.size) (info2, data2) = db.get_record(1) self.assertEqual(data2, data_bar) self.assertEqual(len(data2), info2.size) # iter infos = list(db) self.assertEqual(len(infos), 2) self.assertEqual(infos[0].name, info1.name) self.assertEqual(infos[1].name, info2.name) db.close() return
def step_impl(context): client = docker.from_env() csvlint = client.containers.create( 'gsscogs/csvlint', command=f'csvlint -s /tmp/{context.schema_filename}' ) archive = BytesIO() context.schema_io.seek(0, SEEK_END) schema_size = context.schema_io.tell() context.schema_io.seek(0) context.csv_io.seek(0, SEEK_END) csv_size = context.csv_io.tell() context.csv_io.seek(0) with TarFile(fileobj=archive, mode='w') as t: tis = TarInfo(str(context.schema_filename)) tis.size = schema_size tis.mtime = time.time() t.addfile(tis, BytesIO(context.schema_io.getvalue().encode('utf-8'))) tic = TarInfo(str(context.csv_filename)) tic.size = csv_size tic.mtime = time.time() t.addfile(tic, BytesIO(context.csv_io.getvalue().encode('utf-8'))) archive.seek(0) csvlint.put_archive('/tmp/', archive) csvlint.start() response = csvlint.wait() sys.stdout.write(csvlint.logs().decode('utf-8')) assert_equal(response['StatusCode'], 0)
def test_clean_tar_members(): """ Test `clean_tar_members()` function. """ members = [TarInfo('toplevel'), TarInfo('toplevel/index.html'), TarInfo('toplevel/foo/bar.xyz')] output = [TarInfo('index.html'), TarInfo('foo/bar.xyz')] result = clean_tar_members(members) assert all(x.name == y.name for x, y in zip(output, result))
def test_clean_tar_members(): """ Test `clean_tar_members()` function. """ members = [ TarInfo('toplevel'), TarInfo('toplevel/README.rst'), TarInfo('toplevel/foo/bar0'), TarInfo('toplevel/asdf/qwer/foo0.xyz'), ] cleaned = ['.', 'README.rst', 'foo/bar0', 'asdf/qwer/foo0.xyz'] result = clean_tar_members(members) assert all(x.name in cleaned for x in result)
def stream(self, fileobj, callback=None, sleeptime=5): """ Stream the bundle to the fileobj. This method is a blocking I/O operation. The ``fileobj`` should be an open file like object with 'wb' options. An asynchronous callback method MAY be provided via the optional ``callback`` keyword argument. Periodically, the callback method is provided with the current percentage of completion. """ notifythread = None if callable(callback): self._save_total_size() notifythread = self._setup_notify_thread(callback, sleeptime) tarfile = taropen(None, 'w|', fileobj) for file_data in self.file_data: tarinfo, fileobj = self._tarinfo_from_file_data(file_data) tarfile.addfile(tarinfo, fileobj) self.md_obj.append( self._build_file_info(file_data, fileobj.hashdigest())) md_txt = bytes(metadata_encode(self.md_obj), 'utf8') md_fd = StringIO(md_txt) md_tinfo = TarInfo('metadata.txt') md_tinfo.size = len(md_txt) tarfile.addfile(md_tinfo, md_fd) tarfile.close() self._complete = True if callable(callback): notifythread.join()
def upload(self): logger.info("Loading artifacts") t = ts() tar_data = io.BytesIO() with tarfile.open(fileobj=tar_data, mode="w|") as tar: for root, _, files in os.walk(self._artifact_directory): for af in files: full_path = os.path.join(root, af) relpath = os.path.relpath(full_path, self._artifact_directory) ti = TarInfo(relpath) stat = os.stat(full_path) ti.size = stat.st_size ti.mode = stat.st_mode with open(full_path, "rb") as f: tar.addfile(ti, f) res = self._container.put_archive(config.build_dir, tar_data.getvalue()) if not res: raise Exception(f"Error loading artifact: {af}") t = ts() - t logger.info("Artifacts loaded in %.3fs", t)
def _test_roundtrip(self, context): path = context.given_file() content = b'test content' filename = 'archived-file.txt' with xtarfile_open(path, context.mode('w')) as archive: buffer = BytesIO() buffer.write(content) buffer.seek(0) tarinfo = TarInfo() tarinfo.size = len(content) tarinfo.name = filename archive.addfile(tarinfo, buffer) with xtarfile_open(path, context.mode('r')) as archive: while True: member = archive.next() if member is None: self.fail('{} not found in archive'.format(filename)) if member.name == filename: buffer = archive.extractfile(member) actual_content = buffer.read() break self.assertEqual(actual_content, content)
def stream_regular_file(self, filepath, tarinfo_buf, file_info): try: file_system = files.get_fs_by_file_path(filepath) with file_system.open(filepath, 'rb') as fd: f_iter = iter(lambda: fd.read(self.CHUNKSIZE), '') # pylint: disable=cell-var-from-loop try: yield tarinfo_buf chunk = '' for chunk in f_iter: yield chunk if len(chunk) % self.BLOCKSIZE != 0: yield (self.BLOCKSIZE - (len(chunk) % self.BLOCKSIZE)) * b'\0' except (IOError, fs.errors.OperationFailed): msg = ( "Error happened during sending file content in archive stream, file path: %s, " "container: %s/%s, archive path: %s" % file_info) self.log.critical(msg) self.abort(500, msg) except (fs.errors.ResourceNotFound, fs.errors.OperationFailed, IOError): self.log.critical( "Couldn't find the file during creating archive stream: %s, " "container: %s/%s, archive path: %s" % file_info) tarinfo = TarInfo() tarinfo.name = file_info[3] + '.MISSING' yield tarinfo.tobuf()
def add(self, filename, size, fileobj): """ Add the provided file to the archive """ tarinfo = TarInfo(filename) tarinfo.size = size self._tarfile.addfile(tarinfo, fileobj)
def zip2tar(zip_file: str, tar_file, tar_mode: Optional[str] = 'w:gz'): """ :param zip_file: zip file path :param tar_file: IO(_io.IOBase): file obj :param tar_mode: ref `tarfile.TarFile.open` :return: """ zip_file = ZipFile(file=zip_file, mode='r') tar_file = TarFile.open(fileobj=tar_file, mode=tar_mode) try: for zip_info in zip_file.infolist(): tar_info = TarInfo(name=zip_info.filename) tar_info.size = zip_info.file_size tar_info.mtime = datetime.now().timestamp() # https://stackoverflow.com/a/434689/11722440 tar_info.mode = zip_info.external_attr >> 16 # https://stackoverflow.com/a/18432983/11722440 # https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT # TODO whg fix other file (like symbolic link) in zip to regular file in tar file if zip_info.filename.endswith('/'): tar_info.type = tarfile.DIRTYPE else: tar_info.type = tarfile.REGTYPE infile = zip_file.open(zip_info.filename) tar_file.addfile(tar_info, infile) except Exception as e: raise finally: tar_file.close() zip_file.close()
def test_rawtar(self): """Create a normal tar archive and restore it""" raw = BytesIO() tarfile = TarFile(mode='w', fileobj=raw) testdata = rand_str(20) * 5000 inf = TarInfo("simpletar") fileraw = BytesIO() fileraw.write(testdata) inf.size = len(testdata) fileraw.seek(0) tarfile.addfile(inf, fileobj=fileraw) tarfile.close() raw.seek(0) data = raw.read() cnt = rand_str(20) ret = requests.put(self.make_uri("restore", container=cnt), data=data) self.assertEqual(ret.status_code, 201) meta, stream = self.conn.object_fetch(self.account, cnt, "simpletar") self.assertEqual( md5("".join(stream)).hexdigest(), md5(testdata).hexdigest())
def add_buf_to_tar(tar: TarFile, filename: str, buf: BytesIO): buf.flush() buf.seek(0) info = TarInfo(name=filename) info.size = len(buf.getvalue()) tar.addfile(info, buf)
def stream(self, fileobj, callback=None, sleeptime=5): """ Stream the bundle to the fileobj. The fileobj should be an open file like object with 'wb' options. If the callback is given then percent complete of the size of the bundle will be given to the callback as the first argument. """ notifythread = None if callable(callback): self._save_total_size() notifythread = self._setup_notify_thread(callback, sleeptime) tarfile = taropen(None, 'w|', fileobj) for file_data in self.file_data: tarinfo, fileobj = self._tarinfo_from_file_data(file_data) tarfile.addfile(tarinfo, fileobj) self.md_obj.append( self._build_file_info(file_data, fileobj.hashdigest())) md_txt = metadata_encode(self.md_obj) md_txt = md_txt if PY2 else bytes(md_txt, 'UTF-8') md_fd = StringIO(md_txt) md_tinfo = TarInfo('metadata.txt') md_tinfo.size = len(md_txt) tarfile.addfile(md_tinfo, md_fd) tarfile.close() self._complete = True if callable(callback): notifythread.join()
def _add_entry(self, name, type, mode, size, data): info = TarInfo(name) info.type = type info.mode = mode info.size = size info.mtime = time.time() self._tarfile.addfile(info, data)
def _dump_files(self, tar): """ Dump all uploaded media to the archive. """ # Loop through all models and find FileFields for model in apps.get_models(): # Get the name of all file fields in the model field_names = [] for field in model._meta.fields: if isinstance(field, models.FileField): field_names.append(field.name) # If any were found, loop through each row if len(field_names): for row in model.objects.all(): for field_name in field_names: field = getattr(row, field_name) if field: field.open() info = TarInfo(field.name) info.size = field.size tar.addfile(info, field) field.close()
def copy_to_container(container: "Container", source_path: str, target_path: str) -> None: """ Copy a file into a Docker container :param container: Container object :param source_path: Source file path :param target_path: Target file path (in the container) :return: """ # https://github.com/docker/docker-py/issues/1771 with open(source_path, "rb") as f: data = f.read() tarinfo = TarInfo(name=os.path.basename(target_path)) tarinfo.size = len(data) tarinfo.mtime = int(time.time()) stream = BytesIO() tar = TarFile(fileobj=stream, mode="w") tar.addfile(tarinfo, BytesIO(data)) tar.close() stream.seek(0) container.put_archive(path=os.path.dirname(target_path), data=stream.read())
def move_certs(self, paths): self.log.info("Staging internal ssl certs for %s", self._log_name) yield self.pull_image(self.move_certs_image) # create the volume volume_name = self.format_volume_name(self.certs_volume_name, self) # create volume passes even if it already exists self.log.info("Creating ssl volume %s for %s", volume_name, self._log_name) yield self.docker('create_volume', volume_name) # create a tar archive of the internal cert files # docker.put_archive takes a tarfile and a running container # and unpacks the archive into the container nb_paths = {} tar_buf = BytesIO() archive = TarFile(fileobj=tar_buf, mode='w') for key, hub_path in paths.items(): fname = os.path.basename(hub_path) nb_paths[key] = '/certs/' + fname with open(hub_path, 'rb') as f: content = f.read() tarinfo = TarInfo(name=fname) tarinfo.size = len(content) tarinfo.mtime = os.stat(hub_path).st_mtime tarinfo.mode = 0o644 archive.addfile(tarinfo, BytesIO(content)) archive.close() tar_buf.seek(0) # run a container to stage the certs, # mounting the volume at /certs/ host_config = self.client.create_host_config( binds={ volume_name: {"bind": "/certs", "mode": "rw"}, }, ) container = yield self.docker('create_container', self.move_certs_image, volumes=["/certs"], host_config=host_config, ) container_id = container['Id'] self.log.debug( "Container %s is creating ssl certs for %s", container_id[:12], self._log_name, ) # start the container yield self.docker('start', container_id) # stage the archive to the container try: yield self.docker( 'put_archive', container=container_id, path='/certs', data=tar_buf, ) finally: yield self.docker('remove_container', container_id) return nb_paths
def prepare(self, account, container): assert (self.req) if self.req.headers.get('range') is None: return rnge = ContainerBackup._extract_range(self.req, blocks=None) self._range = [rnge[2], rnge[3]] self.mode = self.MODE_RANGE data = self.redis.get("restore:%s:%s" % (account, container)) if self._range[0] == 0: if data: raise UnprocessableEntity( "A restoration has been already started") self.cur_state = { 'start': -1, 'end': -1, 'manifest': None, 'entry': None, # current entry in process # block offset when appending on existing object 'offset_block': 0, # block offset in data (w/o headers) when appending 'offset': 0 } return if not data: raise UnprocessableEntity("First segment is not available") self.cur_state = json.loads(data, object_pairs_hook=OrderedDict) if self._range[0] != self.cur_state['end']: raise UnprocessableEntity("Segment was already written " "or an error has occured previously") for entry in self.cur_state['manifest']: if self._range[0] > entry['end_block']: continue if self._range[0] == entry['start_block']: self.append = False self.cur_state['offset_block'] = 0 self.cur_state['offset'] = 0 break if self._range[0] >= entry['start_block'] \ + entry['hdr_blocks']: self.append = True self.cur_state['entry'] = entry self.inf = TarInfo() self.inf.name = entry['name'] offset = (self._range[0] - entry['start_block'] - entry['hdr_blocks']) self.cur_state['offset'] = offset * BLOCKSIZE self.inf.size = entry['size'] - offset * BLOCKSIZE self.inf.size = min(self.inf.size, self.req_size) self.cur_state['offset_block'] = (self._range[0] - entry['start_block']) break raise UnprocessableEntity('Header is broken')
def _tarinfo_from_file_data(self, file_data): """Return a tarinfo object from file_data.""" tarinfo = TarInfo(file_data['name']) fileobj = file_data.pop('fileobj', None) for key, value in file_data.items(): setattr(tarinfo, key, value) fileobj = HashFileObj(fileobj, self._hashfunc(), self) return tarinfo, fileobj
def addFile(tar, dest, file, file_size): if dest not in written_files: info = TarInfo(dest) info.size = file_size info.mtime = now info.mode = 0o777 tar.addfile(info, fileobj=file) written_files.add(dest)