def analyze(): try: fn = 'temp/{}.tar'.format( md5(request.remote_addr.encode()).hexdigest()) if request.method == 'POST': fp = request.files['file'] fp.save(fn) if not is_tarfile(fn): return '<script>alert("Uploaded file is not \'tar\' file.");history.back(-1);</script>' tf = TarFile(fn) tf.extractall(fn.split('.')[0]) bd1 = fn.split('/')[1].split('.')[0] bd2 = fn.split('/')[1] return render_template('analyze', path=bd1, fn=bd1, files=tf.getnames()) except Exception as e: return response('Error', 500) finally: try: os.remove(fn) except: return response('Error', 500)
def main(): # check arguments if len(sys.argv) != 2: sys.exit("Error: You should provide 1 argument which is the path to your zip/tar file (" + str(len(sys.argv) - 1) + " provided)") return 0 # check valid path name pathName = sys.argv[1] try: f = open(pathName) except IOError: sys.exit("Error: File \"" + pathName + "\" does not exist.") # check zip or tar fileName = ntpath.basename(pathName) isZip, isTar = False, False if fileName[-4:].lower() == ".zip": isZip = True if fileName[-4:].lower() == ".tar": isTar = True if not (isZip or isTar): sys.exit("Error: Invalid file format (only .zip/.tar files accepted)") # check zip/tar file name if fileName[0:4] != "hw2_": sys.exit("Error: \"" + fileName + "\" is not a valid name.") studentID = fileName[4:-4] # define correct file structure here fileListRef = ["p1/p1_" + studentID + ".py", "p2/p2_" + studentID + ".py", "report_" + studentID + ".pdf"] # zip if isZip: Zip = ZipFile(pathName) fileList = Zip.namelist() fileError = False for f in fileListRef: if f not in fileList: fileError = True; print("Error: " + f + " missing!") if fileError: sys.exit() else: print("Passed! Your student ID is " + studentID + ".") # tar if isTar: Tar = TarFile(pathName) fileList = Tar.getnames() fileError = False for f in fileListRef: if f not in fileList: fileError = True; print("Error: " + f + " missing!") if fileError: sys.exit() else: print("Passed! Your student ID is " + studentID + ".")
def tar_contents(cls, tarfile_name, passwd=None, names=None): results = {} tf = TarFile(tarfile_name) if names is None: names = tf.getnames() for n in names: results[n] = tf.extractfile(n).read() return results
class TarFileWrapper(ArchiveFileWrapper): def __init__(self, fh, *args, **kwargs): self.archive = TarFile(fileobj=fh) super(TarFileWrapper, self).__init__(*args, **kwargs) def extract_file(self, *args, **kwarg): return self.archive.extractfile(*args, **kwarg) def names(self): return self.archive.getnames()
def tar_extract_callback(archive: tarfile.TarFile): global pbar count = 0 pbar = ProgressBar(maxval=len(archive.getnames())) pbar.start() for member in archive: count += 1 pbar.update(count) yield member pbar.finish() pbar = None
class TarReader(Interface): """ """ def __init__(self, origin_path: str, chdir=''): self.path = origin_path self.reader = TarFile(self.path, 'r') self.chdir = chdir def file_exists(self, path: str): return any( x.startswith("%s" % self.chdir + path.rstrip("/")) for x in self.reader.getnames()) def fetch_file_contents(self, path: str): return self.reader.rea(self.chdir + path).decode('utf-8')
def extract_file(tarball: tarfile.TarFile, path: Path) -> None: """Extract a source file from the tarball to a target location Parameters ---------- tarball A .tar file object source path to the file for extraction target path to which the file should be extracted. Defaults to the same as the source """ source_path = str(Path(tarball.getnames()[0], path)) buffer = tarball.extractfile(source_path) with path.open("wb") as file: file.write(buffer.read())
def _check_tar_file(self, content, rootdir, datafiles, simpleNames=False, noTxt=False): with NamedTemporaryFile('w') as tempfile: tempfile.write(content) tempfile.flush() if getsize(tempfile.name) > 0: expect(is_tarfile(tempfile.name)).to_be_truthy() try: tf = TarFile(tempfile.name, 'r') self._check_names(datafiles, tf.getnames(), rootdir, simpleNames, noTxt) finally: tf.close() else: self._check_names(datafiles, [], rootdir, simpleNames, noTxt)
def _check_tar_file(self, content, rootdir, datafiles, simpleNames=False, noTxt=False): with NamedTemporaryFile('w') as tempfile: for c in content: tempfile.write(c) tempfile.flush() if getsize(tempfile.name) > 0: self.assertTrue(is_tarfile(tempfile.name)) try: tf = TarFile(tempfile.name, 'r') self._check_names(datafiles, tf.getnames(), rootdir, simpleNames, noTxt) finally: tf.close() else: self._check_names(datafiles, [], rootdir, simpleNames, noTxt)
def _unpack_data(self, tar: TarFile, data_archive: TarFile): with io.BytesIO( str.encode("\n".join([ member.name.lstrip(".") for member in data_archive if member.name.lstrip(".") ]) + "\n")) as fileobj: info = TarInfo("list") info.size = fileobj.getbuffer().nbytes self._unpack_info_file(tar, info, fileobj) names = tar.getnames() for member in (member for member in data_archive if member.name not in names): if member.islnk() or member.issym() or member.isdir(): tar.addfile(member) else: with data_archive.extractfile(member) as fileobj: tar.addfile(member, fileobj)
def _check_tar_file(self, content, rootdir, datafiles, simpleNames=False, noTxt=False): with NamedTemporaryFile('w') as tempfile: for c in content: tempfile.write(c) tempfile.flush() if getsize(tempfile.name) > 0: expect(is_tarfile(tempfile.name)).to_be_truthy() try: tf = TarFile(tempfile.name, 'r') self._check_names(datafiles, tf.getnames(), rootdir, simpleNames, noTxt) finally: tf.close() else: self._check_names(datafiles, [], rootdir, simpleNames, noTxt)
def prepare_tarball(url, app): ''' Prepare a tarball with app.json from the source URL. ''' got = get(url, allow_redirects=True) raw = GzipFile(fileobj=StringIO(got.content)) tar = TarFile(fileobj=raw) try: dirpath = mkdtemp(prefix='display-screen-') rootdir = join(dirpath, commonprefix(tar.getnames())) tar.extractall(dirpath) if not isdir(rootdir): raise Exception('"{0}" is not a directory'.format(rootdir)) with open(join(rootdir, 'app.json'), 'w') as out: json.dump(app, out) tarpath = make_archive(dirpath, 'gztar', rootdir, '.') finally: rmtree(dirpath) return tarpath
def prepare_tarball(url, app): """ Prepare a tarball with app.json from the source URL. """ got = get(url, allow_redirects=True) raw = GzipFile(fileobj=StringIO(got.content)) tar = TarFile(fileobj=raw) try: dirpath = mkdtemp(prefix="display-screen-") rootdir = join(dirpath, commonprefix(tar.getnames())) tar.extractall(dirpath) if not isdir(rootdir): raise Exception('"{0}" is not a directory'.format(rootdir)) with open(join(rootdir, "app.json"), "w") as out: json.dump(app, out) tarpath = make_archive(dirpath, "gztar", rootdir, ".") finally: rmtree(dirpath) return tarpath
async def _db_file_member_as_model( db_file: tarfile.TarFile, regex: str = "(/desc|/files)$" ) -> AsyncIterator[models.RepoDbMemberData]: """Iterate over the members of a database file, represented by an instance of tarfile.TarFile and yield the members as instances of models.RepoDbMemberData The method filters the list of evaluated members using a regular expression. Depending on member name one of defaults.RepoDbMemberType is chosen. Parameters ---------- tarfile.TarFile An instance of TarFile representing a repository database regex: str A regular expression used to filter the names of the members contained in db_file (defaults to '(/desc|/files)$') """ for name in [ name for name in db_file.getnames() if re.search(regex, name) ]: file_type = defaults.RepoDbMemberType.UNKNOWN if re.search("(/desc)$", name): file_type = defaults.RepoDbMemberType.DESC if re.search("(/files)$", name): file_type = defaults.RepoDbMemberType.FILES yield models.RepoDbMemberData( member_type=file_type, name=await _extract_db_member_package_name(name=name), data=io.StringIO( io.BytesIO( db_file.extractfile(name).read(), # type: ignore ).read().decode("utf-8"), ), )
#!/usr/bin/python2.7 from tarfile import TarFile from datetime import datetime FILENAME = "projectcounts-2008.tar" MAX_REQUESTS = 33056088 * 0.4 if __name__ == "__main__": tar = TarFile(FILENAME) inidate = datetime(year=2008, month=1, day=1) maxrequests = 0 for filename in tar.getnames(): pre, date, time = filename.split("-") year = int(date[0:4]) month = int(date[4:6]) day = int(date[6:8]) hour = int(time[0:2]) minute = int(time[2:4]) second = int(time[4:6]) date = datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second) td = date - inidate seconds = td.days * 24 * 60 * 60 + td.seconds f = tar.extractfile(filename) for line in f.readlines(): if line.startswith("en -"): line = line.replace("\n", "").replace("\r", "") lineSplit = line.split(" ") requests = int(lineSplit[2])
class ArchiveManager: def __init__(self): """Summary """ self.archive = None self.archive_type = None self.listfile = None self.listfile_index = 0 self.archive_path = None self.archive_length = 0 self.hit_next = 0 def open_zip(self, archive_path): if self.archive: self.archive.close() self.archive_path = archive_path filename, file_extension = os.path.splitext(archive_path) if file_extension == ".zip" or file_extension == ".cbz": self.archive = ZipFile(archive_path, 'r') self.archive_type = "zip" namelist = self.archive.namelist() elif file_extension == ".tar" or file_extension == ".cbt": self.archive = TarFile(archive_path, 'r') self.archive_type = "tar" namelist = self.archive.getnames() else: raise ("archive not supported") # we sort the files by decimal found, excluding directories / self.listfile = sorted([x for x in namelist if not x.endswith('/')], key=lambda name: alphanum_key(name)) self.archive_length = len(self.listfile) self.listfile_index = 0 def delete_current_archive(self): if not self.archive or not self.archive_path: return self.archive.close() try: os.remove(self.archive_path) return True except Exception as e: print(e) return False def first_page(self): return self.get_file(self.listfile[0]) def last_page(self): self.listfile_index = len(self.listfile) - 1 return self.get_file(self.listfile[self.listfile_index]) def get_file(self, name): image = BytesIO() if self.archive_type == "zip": image.write(self.archive.read(name)) elif self.archive_type == "tar": tarinfo = self.archive.getmember(name) image_file = self.archive.extractfile(tarinfo) image.write(image_file.read()) else: return None return image def next(self): if not self.archive: return None self.listfile_index = self.listfile_index + 1 if self.listfile_index >= self.archive_length: self.listfile_index = self.archive_length - 1 return None filename = self.listfile[self.listfile_index] return self.get_file(filename) def previous(self): if not self.archive: return None self.listfile_index = self.listfile_index - 1 if self.listfile_index < 0: self.listfile_index = 0 return None filename = self.listfile[self.listfile_index] return self.get_file(filename) def get_current_archive_name(self): return os.path.basename(self.archive_path) def get_display_counter(self): return "%i/%i" % (self.listfile_index + 1, self.archive_length)