class FileSeekerTar(FileSeekerBase): def __init__(self, tar_file_path, temp_folder): FileSeekerBase.__init__(self) self.tar_file = TarFile(tar_file_path) self.temp_folder = temp_folder def search(self, filepattern): pathlist = [] for member in self.tar_file.getmembers(): if fnmatch.fnmatch(member.name, filepattern): try: clean_name = sanitize_file_path(member.name) full_path = os.path.join(self.temp_folder, Path(clean_name)) if member.isdir(): os.makedirs(full_path) else: parent_dir = os.path.dirname(full_path) if not os.path.exists(parent_dir): os.makedirs(parent_dir) with open(full_path, "wb") as fout: fout.write( ExFileObject(self.tar_file, member).read()) fout.close() os.utime(full_path, (member.mtime, member.mtime)) pathlist.append(full_path) except Exception as ex: logfunc( f'Could not write file to filesystem, path was {member.name}' + str(ex)) return pathlist def cleanup(self): self.tar_file.close()
def extract_file(tar: tarfile.TarFile, name: str) -> IO[bytes]: """ Helper for getting a file handle to the database file in the tar archive. This is needed because we don't necessarily know the name of it's containing folder. :raises: TarError if the tar archive does not contain the databse file """ mmdb = next( (m for m in tar.getmembers() if m.name.endswith(name) and m.isfile()), None ) if mmdb is None: # Because we verified the checksum earlier, this should only be # possible if maxmind actually served us a bad file raise tarfile.TarError("Tar archive did not contain the database file!") f = tar.extractfile(mmdb) if f is None: raise tarfile.TarError("Tar archive did not contain the database file!") return f
def filter_tar(arch:tarfile.TarFile, path, max_size=None) -> Generator[tarfile.TarInfo, None, None]: for member in arch.getmembers(): pth = member.name res = is_suspicious(pth, path) if res is not None: yield res elif member.isdir(): yield member elif member.isfile(): if max_size is not None and member.size > max_size: hit = ArchiveAnomaly( location=path, message='Archive contain a file that exceed the configured maximum size', signature=f"archive_anomaly#size#{path}#{pth}", extra={ 'archive_path': pth } ) yield hit continue else: yield member else: continue
def get_openface(file_): tar_file = TarFile(file_) openface_data = {} d = tar_file.extractfile([ x for x in tar_file.getmembers() if x.path.endswith(".csv") ][0]).readlines() failed = set() reference = [] for i, line in enumerate(d): split_line = line.decode("utf-8").strip().split(",") if i == 0: reference = {x: split_line.index(x) for x in split_line} continue frame = int(split_line[reference["frame"]]) if not ONLY_ODD or frame % 2 == 1: confidence = float(split_line[reference["confidence"]]) success = bool(split_line[reference["success"]]) if not success or confidence < 0.98: failed.add(frame) return failed
def check_segmentation_type(tar_file: TarFile) -> SegmentationType: names = [x.name for x in tar_file.getmembers()] if "algorithm.json" in names: return SegmentationType.analysis if "metadata.json" in names: return SegmentationType.mask raise WrongFileTypeException()
def getDataFromTarfile(tarfile): tf = TarFile(tarfile) members = [m.name for m in tf.getmembers()] if list(set([os.path.exists(x) for x in members])) != [True]: tf.extractall() tf.close return members
def _tar_members(in_tar: tarfile.TarFile) -> Iterable[ReadableMember]: """Get readable files (members) from a tar""" members: List[tarfile.TarInfo] = in_tar.getmembers() for member in members: # We return a lambda/callable so that the file isn't opened until it's needed. yield member, partial(in_tar.extractfile, member)
def strip_member_components(archive: tarfile.TarFile, component: int) -> Iterator[tarfile.TarInfo]: """Return members of the archive with stripped prefix (same as --strip-components).""" for member in archive.getmembers(): try: member.path = member.path.split('/', component)[component] yield member except IndexError: pass
def __len__(self): if self.members is not None: return len(self.members) if isdir(self.indir): return len(os.listdir(self.indir)) else: tf = TarFile(self.indir) return sum((1 for m in tf.getmembers() if m.isfile()))
def _tar_members_root_directory_skipped_if_specified(self, tar: TarFile) -> List[TarInfo]: members = tar.getmembers() if self.root_compressed_directory_name_to_skip is not None: for member in members: member.name = member.name.replace(self.root_compressed_directory_name_to_skip, '') return members
def archive_members(*, archive: tarfile.TarFile, root_path: str, search_path: str = None): l = len(f'{root_path}/') for member in archive.getmembers(): if member.path.startswith(f'{root_path}/{search_path or ""}'): member.path = member.path[l:] yield member
def find_first_level_of_tagfile(tf: tarfile.TarFile) -> str: # 通过遍历tar包里的内容,找到首级目录(如果有多个首级目录,那么此方法不适用) _mbs = tf.getmembers() counter = collections.Counter() for mb in _mbs: path = mb.path dname = path.split('/')[0] counter[dname] += 1 return counter.most_common(1)[0][0]
def extract(self, path, cb=None): tarfile = TarFile(fileobj=self.source.extractfile('./DATA')) for member in tarfile.getmembers(): if member.name in ( '/', ''): # extract can't handle making '/' when installing '/' continue tarfile.extract(member, path) if member.isfile() and cb: cb(self.name, os.path.join(path, member.name))
def download(self, src, dest, extract_here=False): client = connect() with SpooledTemporaryFile() as file: file.write(client.copy(self.container_id, src).read()) file.seek(0) tfile = TarFile(fileobj=file) if extract_here: base = len(os.path.basename(src)) + 1 for member in tfile.getmembers(): member.name = member.name[base:] tfile.extractall(path=dest)
def filter_tar( arch: tarfile.TarFile, path, max_size=None ) -> Generator[Union[tarfile.TarInfo, Detection], None, None]: if max_size is None: config.get_maximum_archive_size() for member in arch.getmembers(): pth = member.name res = is_suspicious(pth, path) if res is not None: yield res elif member.isdir(): yield member elif member.issym() or member.islnk(): # https://en.wikipedia.org/wiki/Tar_(computing)#Tarbomb yield Detection(detection_type="ArchiveAnomaly", message="Archive contain a member that is a link.", signature=f"archive_anomaly#link#{path}#{pth}", score=config.get_score_or_default( "archive-member-is-link", 100), extra={ "archive_path": pth, "reason": "member_is_link" }) continue elif member.isfile(): if max_size is not None and member.size > max_size: hit = Detection( detection_type="ArchiveAnomaly", location=path, message= "Archive contain a file that exceed the configured maximum size", score=config.get_score_or_default( "archive-file-size-exceeded", 100), signature=f"archive_anomaly#size#{path}#{pth}", extra={ "archive_path": pth, "reason": "file_size_exceeded", "size": member.size, "limit": max_size }, ) yield hit continue else: yield member else: continue
def _extract_tar_junk_path(tarfile_obj: tarfile.TarFile, archive_extract_dir: Path): """ Extract a tarfile while flattening any directory hierarchy in the archive. """ for member in tarfile_obj.getmembers(): if member.isdir(): # Skip directories continue # Remove the directory hierarchy from the file member.name = Path(member.name).name output_file = archive_extract_dir / member.name LOGGER.debug(f"Extracting member '{member.name}' to '{output_file}'") tarfile_obj.extract(member, path=archive_extract_dir)
def tarfile_extract( tf: tarfile.TarFile, *, tarfile_base_path: str, extract_base_path: str, limit_files: Optional[List[str]], ) -> List[str]: tarfile_base_path = os.path.normpath(tarfile_base_path) extract_base_path = os.path.normpath(extract_base_path) extracted_files = [] for member_info in tf.getmembers(): if not member_info.isfile(): continue member = member_info.name member = os.path.normpath(member) if os.path.commonpath([member, tarfile_base_path ]) != tarfile_base_path: continue extract_fname = os.path.relpath(member, tarfile_base_path) if limit_files is not None: found = False for limit_fname in limit_files: limit_fname = os.path.normpath(limit_fname) + ( "/" if limit_fname.endswith("/") else "") if limit_fname.endswith("/"): if extract_fname.startswith(limit_fname): found = True continue elif limit_fname == extract_fname: found = True continue if not found: continue extract_path = os.path.join(extract_base_path, extract_fname) tarfile_extract_single_file(tf, member, extract_path) extracted_files.append(member) return extracted_files
def __init__(self, filename): MultiThreadDump.__init__(self) if filename[-2:] == 'gz': tar_file = TarFile(filename, 'r:gz') else: tar_file = TarFile(filename, 'r') for member in tar_file.getmembers(): if member.isfile(): tar_member = tar_file.extractfile(member) child_name = tar_member.name lines = tar_member.readlines() self.thread_dumps[child_name] = ThreadDump(lines)
def __iter__(self): if isdir(self.indir): for word in os.listdir(self.indir): if not self.word_included(word): continue with open(pjoin(self.indir, word), "rb") as defn_fp: yield word, defn_fp else: tf = TarFile(self.indir) for member in tf.getmembers(): word = basename(member.name) if not self.word_included(word): continue if member.isfile(): yield word, tf.extractfile(member)
def _check_tar(tar: tarfile.TarFile) -> None: """Check the tarfile to avoid potential security issues. Currently collections and packages have the following constraints: - Only regular files or directories - No paths starting with '/' or containing '..' """ for info in tar.getmembers(): if not (info.isfile() or info.isdir()): raise wn.Error( f'tarfile member is not a regular file or directory: {info.name}' ) if info.name.startswith('/') or '..' in info.name: raise wn.Error( f'tarfile member paths may not be absolute or contain ..: {info.name}' )
def _untar_layers(dir, layers): output = {} # Untar layer filesystem bundle for layer in layers: tarfile = TarFile(dir + "/" + layer) for member in tarfile.getmembers(): output[member.name] = member for member_name in output: try: tarfile.extract(output[member_name], path=dir, set_attrs=False) except (ValueError, ReadError): pass # Clean up for layer in layers: clean_up(dir + "/" + layer[:-10])
def load_from_file(self, f): tar = TarFile(f, "r") # load info file f = tar.extractfile("info.py") self.agedesc, self.generation = eval(f.read(-1), {"__builtins__": None}) f.close() # load agents for info in tar.getmembers(): if (splitext(info.name)[1]==".agt" and info.isfile()): f = tar.extractfile(info) self.add(Agent(self.agedesc, file = f)) f.close() tar.close()
def unpack_archive(archive_staging_dir, archive, external_id, target_path, filelist=None): """Unpack a tar file containing the files that are in the MigrationArchive object""" # create the name of the archive archive_path = archive.get_archive_name(archive_staging_dir) # create the target directory if it doesn't exist try: os.makedirs(target_path) except: pass try: tar_file = TarFile(archive_path, 'r') # check that the tar_file digest matches the digest in the database digest = calculate_digest(archive_path) if digest != archive.digest: error_string = ( "Digest does not match for archive: {}").format(archive_path) raise Exception(error_string) except: error_string = ("Could not find archive path: {}").format(archive_path) raise Exception(error_string) # untar each file for tar_info in tar_file.getmembers(): try: # if filelist only extract those in the filelist if filelist: if tar_info.name in filelist: tar_file.extract(tar_info, path=target_path) else: tar_file.extract(tar_info, path=target_path) logging.debug( (" Extracting file: {} from archive: {} to directory: {}" ).format(tar_info.name, archive.get_id(), target_path)) except Exception as e: error_string = ( "Could not extract file: {} from archive {} to path: {}, exception: {}" ).format(tar_info.name, archive.get_id(), target_path, str(e)) logging.error(error_string) raise Exception(error_string) tar_file.close()
def load_from_file(self, f): tar = TarFile(f, "r") # load info file f = tar.extractfile("info.py") self.agedesc, self.generation = eval(f.read(-1), {"__builtins__": None}) f.close() # load agents for info in tar.getmembers(): if (splitext(info.name)[1] == ".agt" and info.isfile()): f = tar.extractfile(info) self.add(Agent(self.agedesc, file=f)) f.close() tar.close()
def get_root_json_from_image(img: tarfile.TarFile) -> Tuple[str, dict]: """ Every docker image has a root .json file with the metadata information. this function locate this file, load it and return the value of it and their name >>> get_docker_image_layers(img) ('db079554b4d2f7c65c4df3adae88cb72d051c8c3b8613eb44e86f60c945b1ca7', dict(...)) """ for f in img.getmembers(): if f.name.endswith("json") and "/" not in f.name: c = img.extractfile(f.name).read() if hasattr(c, "decode"): c = c.decode() return f.name.split(".")[0], json.loads(c) return None, None
def __members(self, tf: TarFile, subfolder: str): """Helper function for extracting folders from a tar ball. Will allow extracted files to exclude the provided subfolder from their extracted path. Args: tf (TarFile): The tar file to extract. subfolder (string): The subfolder to exclude from the extraction path. """ length_of_subfolder = len(subfolder) for member in tf.getmembers(): # For each file (member) in the tar file check to see if it starts with the specified string (subfolder). if member.path.startswith(subfolder): # If it does start with that string, exclude that string from the start of the Path we are extracting it to. # This allows us to put the extracted files straight into their respective `conf` or `data` directories. member.path = member.path[length_of_subfolder:] # On the rare occasion that a leading "/" sneaks in remove it otherwise restore will try to un-tar to "/" member.path = member.path.strip("/") yield member
def extract(self, path, cb=None): file_path_list = [] tarfile = TarFile(fileobj=self.source.extractfile('./DATA')) for member in tarfile.getmembers(): if member.name in ( '/', ''): # extract can't handle making '/' when installing '/' continue file_path = os.path.join(path, member.name) if cb is not None: cb(file_path) tarfile.extract(member, path) if member.isfile(): file_path_list.append(file_path) return file_path_list
class FileSeekerTar(FileSeekerBase): def __init__(self, tar_file_path, temp_folder): FileSeekerBase.__init__(self) self.tar_file = TarFile(tar_file_path) self.temp_folder = temp_folder def search(self, filepattern): pathlist = [] for member in self.tar_file.getmembers(): if fnmatch.fnmatch(member.name, filepattern): try: self.tar_file.extract(member.name, path=self.temp_folder) pathlist.append( os.path.join(self.temp_folder, Path(member.name))) except: logfunc('Could not write file to filesystem') return pathlist def cleanup(self): self.tar_file.close()
def _untar_layers(dir, layers): output = {} # Untar layer filesystem bundle for layer in layers: tarfile = TarFile(dir + "/" + layer) for member in tarfile.getmembers(): try: tarfile.extract(member, path=dir, set_attrs=False) except (ValueError, ReadError) as ex: if InternalServer.is_debug_logging_enabled(): message = "Unexpected exception of type {0} occurred while untaring the docker image: {1!r}" \ .format(type(ex).__name__, ex.get_message() if type(ex).__name__ == 'DagdaError' else ex.args) DagdaLogger.get_logger().debug(message) except PermissionError as ex: message = "Unexpected error occurred while untaring the docker image: " + \ "Operation not permitted on {0!r}".format(member.name) DagdaLogger.get_logger().warn(message) # Clean up for layer in layers: clean_up(dir + "/" + layer[:-10])
def run(self): """ Interesting magic to get a source dist and running trial on it. NOTE: there is magic going on here! If you know a better way feel free to update it. """ # Clean out dist/ if os.path.exists("dist"): for root, dirs, files in os.walk("dist", topdown=False): for name in files: os.remove(os.path.join(root, name)) for name in dirs: os.rmdir(os.path.join(root, name)) # Import setup making it as if we ran setup.py with the sdist arg sys.argv.append("sdist") import setup # @Reimport @UnresolvedImport @UnusedImport try: # attempt to extract the sdist data from gzip import GzipFile from tarfile import TarFile # We open up the gzip as well as using the first item as the sdist gz = GzipFile(os.path.join("dist", os.listdir("dist")[0])) tf = TarFile(fileobj=gz) # Make the output dir and generate the extract path os.mkdir(os.path.join("dist", "sdist_test")) ex_path = os.path.join("dist", "sdist_test", tf.getmembers()[0].name, "buildbot", "test") # Extract the data and run tests print "Extracting to %s" % ex_path tf.extractall(os.path.join("dist", "sdist_test")) print "Executing tests ..." self._run(os.path.normpath(os.path.abspath(ex_path))) except IndexError, ie: # We get called twice and the IndexError is OK pass
def run(self): """ Interesting magic to get a source dist and running trial on it. NOTE: there is magic going on here! If you know a better way feel free to update it. """ # Clean out dist/ if os.path.exists('dist'): for root, dirs, files in os.walk('dist', topdown=False): for name in files: os.remove(os.path.join(root, name)) for name in dirs: os.rmdir(os.path.join(root, name)) # Import setup making it as if we ran setup.py with the sdist arg sys.argv.append('sdist') import setup #@Reimport @UnresolvedImport @UnusedImport try: # attempt to extract the sdist data from gzip import GzipFile from tarfile import TarFile # We open up the gzip as well as using the first item as the sdist gz = GzipFile(os.path.join('dist', os.listdir('dist')[0])) tf = TarFile(fileobj=gz) # Make the output dir and generate the extract path os.mkdir(os.path.join('dist', 'sdist_test')) ex_path = os.path.join('dist', 'sdist_test', tf.getmembers()[0].name, 'buildbot', 'test') # Extract the data and run tests print "Extracting to %s" % ex_path tf.extractall(os.path.join('dist', 'sdist_test')) print "Executing tests ..." self._run(os.path.normpath(os.path.abspath(ex_path))) except IndexError, ie: # We get called twice and the IndexError is OK pass
def extract_snapshot(tar: tarfile.TarFile, domains: Dict[str, DomainSpec]) -> None: """Used to restore a configuration snapshot for "discard changes""" tar_domains = {} for member in tar.getmembers(): try: if member.name.endswith(".tar.gz"): tar_domains[member.name[:-7]] = member except Exception: pass # We are using the var_dir, because tmp_dir might not have enough space restore_dir = cmk.utils.paths.var_dir + "/wato/snapshots/restore_snapshot" if not os.path.exists(restore_dir): os.makedirs(restore_dir) def check_domain(domain: DomainSpec, tar_member: tarfile.TarInfo) -> List[str]: errors = [] prefix = domain["prefix"] def check_exists_or_writable(path_tokens: List[str]) -> bool: if not path_tokens: return False if os.path.exists("/".join(path_tokens)): if os.access("/".join(path_tokens), os.W_OK): return True # exists and writable errors.append( _("Permission problem: Path not writable %s") % "/".join(path_tokens)) return False # not writable return check_exists_or_writable(path_tokens[:-1]) # The complete tar file never fits in stringIO buffer.. tar.extract(tar_member, restore_dir) # Older versions of python tarfile handle empty subtar archives :( # This won't work: subtar = tarfile.open("%s/%s" % (restore_dir, tar_member.name)) p = subprocess.Popen( ["tar", "tzf", "%s/%s" % (restore_dir, tar_member.name)], stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8", ) stdout, stderr = p.communicate() if stderr: errors.append(_("Contains corrupt file %s") % tar_member.name) return errors for line in stdout: full_path = prefix + "/" + line path_tokens = full_path.split("/") check_exists_or_writable(path_tokens) # Cleanup os.unlink("%s/%s" % (restore_dir, tar_member.name)) return errors def cleanup_domain(domain: DomainSpec) -> List[str]: # Some domains, e.g. authorization, do not get a cleanup if domain.get("cleanup") is False: return [] def path_valid(prefix: str, path: str) -> bool: if path.startswith("/") or path.startswith(".."): return False return True # Remove old stuff for what, path in domain.get("paths", {}): if not path_valid(domain["prefix"], path): continue full_path = "%s/%s" % (domain["prefix"], path) if os.path.exists(full_path): if what == "dir": exclude_files = [] for pattern in domain.get("exclude", []): if "*" in pattern: exclude_files.extend( glob.glob("%s/%s" % (domain["prefix"], pattern))) else: exclude_files.append("%s/%s" % (domain["prefix"], pattern)) _cleanup_dir(full_path, exclude_files) else: os.remove(full_path) return [] def extract_domain(domain: DomainSpec, tar_member: tarfile.TarInfo) -> List[str]: try: target_dir = domain.get("prefix") if not target_dir: return [] # The complete tar.gz file never fits in stringIO buffer.. tar.extract(tar_member, restore_dir) command = [ "tar", "xzf", "%s/%s" % (restore_dir, tar_member.name), "-C", target_dir ] p = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8", ) _stdout, stderr = p.communicate() exit_code = p.wait() if exit_code: return ["%s - %s" % (domain["title"], stderr)] except Exception as e: return ["%s - %s" % (domain["title"], str(e))] return [] def execute_restore(domain: DomainSpec, is_pre_restore: bool = True) -> List[str]: if is_pre_restore: if "pre_restore" in domain: return domain["pre_restore"]() else: if "post_restore" in domain: return domain["post_restore"]() return [] total_errors = [] logger.info("Restoring snapshot: %s", tar.name) logger.info("Domains: %s", ", ".join(tar_domains.keys())) for what, abort_on_error, handler in [ ("Permissions", True, check_domain), ("Pre-Restore", True, lambda domain, tar_member: execute_restore( domain, is_pre_restore=True)), ("Cleanup", False, lambda domain, tar_member: cleanup_domain(domain)), ("Extract", False, extract_domain), ("Post-Restore", False, lambda domain, tar_member: execute_restore( domain, is_pre_restore=False)) ]: errors: List[str] = [] for name, tar_member in tar_domains.items(): if name in domains: try: dom_errors = handler(domains[name], tar_member) errors.extend(dom_errors or []) except Exception: # This should NEVER happen err_info = "Restore-Phase: %s, Domain: %s\nError: %s" % ( what, name, traceback.format_exc()) errors.append(err_info) logger.critical(err_info) if not abort_on_error: # At this state, the restored data is broken. # We still try to apply the rest of the snapshot # Hopefully the log entry helps in identifying the problem.. logger.critical( "Snapshot restore FAILED! (possible loss of snapshot data)" ) continue break if errors: if what == "Permissions": errors = list(set(errors)) errors.append( _("<br>If there are permission problems, please ensure the site user has write permissions." )) if abort_on_error: raise MKGeneralException( _("%s - Unable to restore snapshot:<br>%s") % (what, "<br>".join(errors))) total_errors.extend(errors) # Cleanup _wipe_directory(restore_dir) if total_errors: raise MKGeneralException( _("Errors on restoring snapshot:<br>%s") % "<br>".join(total_errors))
def create_new_docker_image(manifest: dict, image_output_path: str, img: tarfile.TarFile, old_layer_digest: str, new_layer_path: str, new_layer_digest: str, json_metadata_last_layer: dict = None, json_metadata_root: dict = None): with tarfile.open(image_output_path, "w") as s: for f in img.getmembers(): log.debug(" _> Processing file: {}".format(f.name)) # Add new manifest if f.name == "manifest.json": # Dump Manifest to JSON new_manifest_json = json.dumps(manifest).encode() replace_or_append_file_to_layer("manifest.json", new_manifest_json, s) # # NEW LAYER INFO # elif old_layer_digest in f.name: # Skip for old layer.tar file if f.name == "{}/layer.tar".format(old_layer_digest) or \ "/" not in f.name: log.debug( " _> Replacing layer {} by {}".format( f.name, new_layer_digest )) replace_or_append_file_to_layer("{}/layer.tar".format( new_layer_digest), new_layer_path, s) else: # # Extra files: "json" and "VERSION" # c = read_file_from_image(img, f.name) if "json" in f.name: # Modify the JSON content to add the new # hash if json_metadata_last_layer: c = json.dumps(json_metadata_last_layer).encode() else: c = c.decode().replace(old_layer_digest, new_layer_digest).encode() replace_or_append_file_to_layer("{}/{}".format( new_layer_digest, os.path.basename(f.name)), c, s) # # Root .json file with the global info # elif "repositories" in f.name: c = read_file_from_image(img, f, autoclose=False) j = json.loads(c.decode()) image = list(j.keys())[0] tag = list(j[image].keys())[0] # Update the latest layer j[image][tag] = new_layer_digest new_c = json.dumps(j).encode() replace_or_append_file_to_layer(f.name, new_c, s) elif ".json" in f.name and "/" not in f.name: c = read_file_from_image(img, f, autoclose=False) # Modify the JSON content to add the new # hash if json_metadata_root: j = json_metadata_root else: j = json.loads(c.decode()) j["rootfs"]["diff_ids"][-1] = \ "sha256:{}".format(new_layer_digest) new_c = json.dumps(j).encode() replace_or_append_file_to_layer(f.name, new_c, s) # Add the rest of files / dirs else: s.addfile(f, img.extractfile(f))
from misc.shared import DATA_DIR from misc.utils import replace_part for dir_ in tqdm(list(DATA_DIR.glob("Sessions_50fps/*/*"))): openface_landmarks = replace_part( dir_, "Sessions_50fps", "Sessions_50fps_openface_51_landmarks").with_suffix("") tar_file = TarFile( replace_part(dir_, "Sessions_50fps", "Sessions_50fps_openface").with_suffix(".tar")) openface_data = {} openface_landmarks.mkdir(parents=True, exist_ok=True) d = tar_file.extractfile([ x for x in tar_file.getmembers() if x.path.endswith(".csv") ][0]).readlines() for i, line in enumerate(d): if i == 0: continue split_line = line.decode('utf-8').split(",") npy_file = (openface_landmarks / split_line[0].zfill(5)).with_suffix(".npy") data = np.array(list(float(x) for x in split_line[299:435])).reshape(2, -1).T[17:] np.save(npy_file, data)