def extract(path, timeout=None, extract_dir=None, content_type=None): """ Extract path into a temporary directory in `extract_dir`. Yields an object containing the temporary path and the content type of the original archive. If the extraction takes longer than `timeout` seconds, the temporary path is removed, and an exception is raised. """ content_type = content_type or content_type_from_file(path) if content_type == "application/zip": extractor = ZipExtractor(timeout=timeout) else: extractor = TarExtractor(timeout=timeout) try: ctx = extractor.from_path(path, extract_dir=extract_dir, content_type=content_type) content_type = extractor.content_type yield Extraction(ctx.tmp_dir, content_type) finally: if extractor.created_tmp_dir: fs.remove(extractor.tmp_dir, chmod=True)
def from_path(self, path, extract_dir=None, content_type=None): if os.path.isdir(path): self.tmp_dir = path else: self.content_type = content_type or content_type_from_file(path) tar_flag = self._tar_flag_for_content_type(self.content_type) self.tmp_dir = tempfile.mkdtemp(prefix="insights-", dir=extract_dir) self.created_tmp_dir = True command = "tar --delay-directory-restore %s -x --exclude=*/dev/null -f %s -C %s" % (tar_flag, path, self.tmp_dir) logging.debug("Extracting files in '%s'", self.tmp_dir) subproc.call(command, timeout=self.timeout) return self