def libarchive_getsize(path: Path, filename: str) -> int: with path.open('rb') as f: with libarchive.Archive(f, 'r') as a: for item in a: if item.pathname == filename: return item.size raise FileNotFoundError(filename)
def debuginfo_parser(adebug_package, filename): """ return the contents of filename """ try: dfd = open(adebug_package, "rb") da = libarchive.Archive(dfd) for entry in da: size = entry.size # skip 0 byte files only, size can be 0 due to compression also! if size == 0: continue # skip directories if stat.S_ISDIR(entry.mode): continue # .dwz stuff is special if filename == "dwz" and \ entry.pathname.startswith("./usr/lib/debug/.dwz/"): data = da.read(entry.size) da.close() dfd.close() return cStringIO(data) elif entry.pathname.endswith(filename): data = da.read(entry.size) da.close() dfd.close() return cStringIO(data) except Exception, exc: print(adebug_package, str(exc)) traceback.print_exc()
def __init__(self, name: str): self.name = name self.entries = {} with libarchive.Archive(self.name, 'r') as archive: for entry in archive: if entry.size != 0: package = self.parse_entry( str(archive.read(entry.size), 'utf-8')) self.entries[package.name] = package
def extractFiles(self): # TODO: globbing or regex on self.paths? # If we have yum, we can, TECHNICALLY, do this with: # http://yum.baseurl.org/api/yum/rpmUtils/miscutils.html#rpmUtils.miscutils.rpm2cpio # But nope. We can't selectively decompress members based on path with rpm2cpio-like funcs. # We keep getting extraction artefacts, at least with legacy libarchive_c, so we use a hammer. _curdir = os.getcwd() _tempdir = tempfile.mkdtemp() os.chdir(_tempdir) for rpm_file in self.rpms: rf = self.rpms[rpm_file] if is_ctype: if not is_legacy: # ctype - extracts to pwd with libarchive.file_reader(rf) as reader: for entry in reader: if self.paths and entry.path not in self.paths: continue if entry.isdir(): continue fpath = os.path.join(self.dest_dir, rpm_file, entry.path) if not os.path.isdir(os.path.dirname(fpath)): os.makedirs(os.path.dirname(fpath)) with open(fpath, 'wb') as f: for b in entry.get_blocks(): f.write(b) else: with libarchive.Archive(rf) as reader: for entry in reader: if (self.paths and entry.pathname not in self.paths) or (entry.isdir()): continue fpath = os.path.join(self.dest_dir, rpm_file, entry.pathname) if not os.path.isdir(os.path.dirname(fpath)): os.makedirs(os.path.dirname(fpath)) reader.readpath(fpath) else: # pyEasyArchive/"pypi/libarchive" with lap.file_reader(rf) as reader: for entry in reader: if (self.paths and entry.pathname not in self.paths) or (entry.filetype.IFDIR): continue fpath = os.path.join(self.dest_dir, rpm_file, entry.pathname) if not os.path.isdir(os.path.dirname(fpath)): os.makedirs(os.path.dirname(fpath)) with open(fpath, 'wb') as f: for b in entry.get_blocks(): f.write(b) os.chdir(_curdir) shutil.rmtree(_tempdir) return()
def analyze(rpmfile, show_errors=False, opformat="json"): """Analyse single RPM file""" if not os.path.exists(rpmfile): print >> sys.stderr, "%s doesn't exists!" % rpmfile return if not rpmfile.endswith(".rpm"): print >> sys.stderr, "skipping %s " % rpmfile return try: a = libarchive.Archive(rpmfile) except Exception, exc: print >> sys.stderr, rpmfile, str(exc) return
def create2(container, archivepath, checked): if not isinstance(container, FileSystem): logging.info("container '{0}' is not FileSystem".format(container)) return False archivepath = os.path.abspath(archivepath) with libarchive.Archive(archivepath, 'w') as a: logging.info(u"START create selective '{0}'".format(archivepath)) for node in checked: path = s2u(node.get_path()) pathname = s2u(os.path.join(*node.get_data_array()[1:])) logging.info(u"from '{0}' to '{1}'".format(path, pathname)) a.writepath(path, pathname) logging.info(u"END create selective '{0}'".format(archivepath)) return True
def create(container, archivepath, checked): if not isinstance(container, FileSystem): logging.info("container '{0}' is not FileSystem".format(container)) return False archivepath = os.path.abspath(u2s(archivepath)) BUFFER_SIZE = 2048 with libarchive.Archive(archivepath, 'w') as larchive: a = larchive._a for node in checked: if node.is_dir(): continue path = u2s(node.get_path()) pathname = u2s(os.path.join(*node.get_data_array()[1:])) st = os.stat(path) entry = _libarchive.archive_entry_new() _libarchive.archive_entry_set_pathname(entry, pathname) _libarchive.archive_entry_set_size(entry, st.st_size) _libarchive.archive_entry_set_filetype(entry, stat.S_IFMT(st.st_mode)) _libarchive.archive_entry_set_mtime(entry, st.st_mtime, 0) _libarchive.archive_entry_set_perm(entry, stat.S_IMODE(st.st_mode)) _libarchive.archive_write_header(a, entry) f = open(path, 'r') data = f.read(BUFFER_SIZE) count = len(data) while count > 0: _libarchive.archive_write_data_from_str(a, data) data = f.read(BUFFER_SIZE) count = len(data) f.close() _libarchive.archive_entry_free(entry) _libarchive.archive_write_close(a)
def checkArchive(filename, part): if not haveLibarchive: return False fparts = filename.split('.') if fparts[-1].lower() in libarchive.FILTERS: fparts.pop() if fparts[-1].lower() not in libarchive.FORMATS: return False d = tempfile.mkdtemp() f = '%s/%s' % (d, filename.replace('/', '')) t = open(f, 'w') t.write(part.get_payload(decode=True)) t.close() a = libarchive.Archive(f) found = False for entry in a: if blockedPattern.match(entry.pathname): found = True os.unlink(f) os.rmdir(d) return found
def libarchive_get(path: Path, filename: str, offset: int = 0, amount: int = -1) -> bytes: with path.open('rb') as f: with libarchive.Archive(f, 'r') as a: for item in a: if item.pathname == filename: #Stream returned by readstream() doesn't support seek() by the looks of it if amount != -1: with a.readstream(item.size) as streamy_boi: if not offset: return streamy_boi.read(amount) data = streamy_boi.read( offset + amount ) #I guess we will just figure it out ourselves else: data = a.read(item.size) if amount == -1 and not offset: return data return data[offset:offset + amount] raise FileNotFoundError(filename)
return package = found.groups()[0] debug_package = package + "-debuginfo-" package = h[rpm.RPMTAG_NAME] group = h[rpm.RPMTAG_GROUP] output = {} output["package"] = package output["group"] = group output["build"] = os.path.basename(rpmfile) output["files"] = [] output["nvr"] = nvr try: fd = open(rpmfile, "rb") a = libarchive.Archive(fd) except Exception, exc: print(rpmfile, str(exc)) return # process the binary RPM ELFs = [] for entry in a: size = entry.size # skip 0 to 4 byte files only, size can be 0 due to compression also! if size < 4 and not stat.S_ISDIR(entry.mode): continue # skip directories if stat.S_ISDIR(entry.mode):
def run(self): try: self.preload() sftp = self.get_sftp_connection(self.session) abs_archive_path = os.path.join(TMP_DIR, self.login, self.random_hash()) archive_dir = os.path.dirname(abs_archive_path) if not os.path.exists(archive_dir): os.makedirs(archive_dir) dir_name = os.path.dirname(self.path) if not sftp.exists(dir_name): sftp.makedirs(dir_name) if not sftp.isdir(dir_name): raise Exception("Destination path is not a directory") archive_type = self.get_archive_type(self.type) if not archive_type: raise Exception("Unknown archive type") archive_path = abs_archive_path + "." + archive_type if os.path.exists(archive_path): raise Exception("Archive file already exist") self.on_running(self.status_id, pid=self.pid, pname=self.name) archive = libarchive.Archive(archive_path, "w") next_tick = time.time() + REQUEST_DELAY i = 0 for file_item in self.file_items: try: abs_path = file_item.get("path") file_basename = os.path.basename(abs_path) if sftp.isfile(abs_path): self.logger.info("Packing file: %s" % (abs_path,)) f = sftp.open(abs_path, 'rb') archive.write(self.make_entry(abs_path, file_basename), data=f.read()) f.close() elif sftp.isdir(abs_path): self.logger.info("Packing dir: %s" % (abs_path,)) for current, dirs, files in sftp.walk(abs_path): for f in files: file_path = os.path.join(current, f) file_obj = sftp.open(file_path, 'rb') rel_path = os.path.relpath(file_path, abs_path) base_path = os.path.join(file_basename, rel_path) archive.write(self.make_entry(file_path, base_path), data=file_obj.read()) file_obj.close() i += 1 if time.time() > next_tick: progress = { 'percent': round(float(i) / float(len(self.file_items)), 2), 'text': str(int(round(float(i) / float(len(self.file_items)), 2) * 100)) + '%' } self.on_running(self.status_id, progress=progress, pid=self.pid, pname=self.name) next_tick = time.time() + REQUEST_DELAY except Exception as e: self.logger.error( "Error archive file %s , error %s , %s" % (str(file_item), str(e), traceback.format_exc())) raise e self.logger.info("Uploading created archive {} to remote path".format(abs_archive_path, self.path)) remote_path = self.path + '.' + archive_type r = sftp.sftp.put(archive_path, remote_path) self.logger.info("sftp put result local_path {} remote_path {}, sftp_results {}".format( archive_path, remote_path, r)) progress = { 'percent': round(float(i) / float(len(self.file_items)), 2), 'text': str(int(round(float(i) / float(len(self.file_items)), 2) * 100)) + '%' } result = { "archive": self._make_file_info(archive_path) } self.on_success(self.status_id, data=result, progress=progress, pid=self.pid, pname=self.name) except Exception as e: result = { "error": True, "message": str(e), "traceback": traceback.format_exc() } self.logger.error("SFTP createArchive error = {}".format(result)) self.on_error(self.status_id, result, pid=self.pid, pname=self.name)
def analyze(rpmfile, show_errors=False, opformat="json"): """Analyse single RPM file""" if not os.path.exists(rpmfile): print >> sys.stderr, "%s doesn't exists!" % rpmfile return if not rpmfile.endswith(".rpm"): # print >> sys.stderr, "skipping %s" % os.path.basename(rpmfile) return try: a = libarchive.Archive(rpmfile) except Exception as exc: print >> sys.stderr, rpmfile, str(exc) return try: ts = rpm.TransactionSet() ts.setVSFlags(rpm._RPMVSF_NOSIGNATURES) fd = os.open(rpmfile, os.O_RDONLY) h = ts.hdrFromFdno(fd) os.close(fd) except Exception as exc: print >> sys.stderr, rpmfile, str(exc) return # create lookup dictionary # print dir(h) # print dir(rpm) nvr = h[rpm.RPMTAG_NVR] package = h[rpm.RPMTAG_NAME] group = h[rpm.RPMTAG_GROUP] caps = h[rpm.RPMTAG_FILECAPS] names = h['FILENAMES'] groups = h[rpm.RPMTAG_FILEGROUPNAME] users = h[rpm.RPMTAG_FILEUSERNAME] lookup = defaultdict(list) for n, u, g in zip(names, users, groups): lookup[n].append((u, g)) filecaps = [] for i, cap in enumerate(caps): if cap: filecaps.append([names[i], cap]) pols = [] lines = "" output = {} output["package"] = package output["group"] = group output["build"] = os.path.basename(rpmfile) output["files"] = [] output["daemon"] = False output["nvr"] = nvr output["filecaps"] = filecaps output["polkit"] = False output["caps"] = False output["pols"] = pols if filecaps: output["caps"] = True flag = False for entry in a: directory = False size = entry.size # polkit checks, "startswith" is better but ... if "/etc/polkit" in entry.pathname or \ "/usr/share/PolicyKit" in entry.pathname or \ "/usr/share/polkit-1" in entry.pathname: pols.append(entry.pathname) output["polkit"] = True # check if package is a daemon if "/etc/rc.d/init.d" in entry.pathname or \ "/lib/systemd" in entry.pathname: output["daemon"] = True # skip 0 byte files only # NOTE: size can be 0 due to compression also! if size == 0 and not stat.S_ISDIR(entry.mode): continue # we are only interested in particular kind of directories if stat.S_ISDIR(entry.mode): if not ((entry.mode & stat.S_ISUID) or (stat.S_ISGID & entry.mode)): continue else: flag = True directory = True # check for executable flag # if not (entry.mode & 0111): # continue # always report setxid files if ((entry.mode & stat.S_ISUID) or (stat.S_ISGID & entry.mode)): flag = True # skip library files filename = entry.pathname.lstrip(".") # if not flag and (("lib" in filename and ".so" in filename) or \ # filename.endswith(".so")): # continue try: contents = a.read(size) except Exception: continue # invoke checksec only on files returncode = -1 if not directory: try: fh = cStringIO(contents) elf = Elf(fh) if opformat == "json": out = process_file(elf, deps=True) # polkit check 2 if "polkit" in out: output["polkit"] = True else: out = process_file(elf) dataline = "%s,%s,%s,mode=%s,%s" % (package, os.path.basename(rpmfile), filename, oct(entry.mode), out) returncode = 0 except ELFError as exc: if show_errors: print >> sys.stderr, "%s,%s,Not an ELF binary" % \ (filename, str(exc)) continue except IOError as exc: if show_errors: print >> sys.stderr, "%s,%s,Not an ELF binary" % \ (filename, str(exc)) continue if flag or returncode == 0: # populate fileinfo object fileinfo = {} fileinfo["name"] = filename fileinfo["size"] = entry.size fileinfo["mode"] = entry.mode fileinfo["user"], fileinfo["group"] = lookup[filename][0] if directory: fileinfo["directory"] = directory output["files"].append(fileinfo) if returncode == 0 and opformat == "csv": lines = lines + dataline + "\n" else: # print >> sys.stderr, dataline pass if returncode == 0 and opformat == "json": try: for kvp in out.split(","): key, value = kvp.split("=") fileinfo[key] = value except Exception: pass a.close() if opformat == "json": return json.dumps(output) else: return lines.rstrip()
def run(self): try: self.preload() # prepare download dir strictly after dropping privileges if not os.path.exists(self.folder_for_archive): os.makedirs(self.folder_for_archive) if not os.path.exists(self.tmp_dir): os.makedirs(self.tmp_dir) sftp = self.get_sftp_connection(self.session) abs_extract_path = self.extract_path if not sftp.exists(abs_extract_path): try: sftp.makedirs(abs_extract_path) except Exception as e: self.logger.error("Cannot create extract path %s. %s" % (str(e), traceback.format_exc())) raise Exception("Cannot create extract path") elif sftp.isfile(abs_extract_path): raise Exception("Extract path incorrect - file exists") abs_archive_path = self.file.get("path") archive_name = os.path.basename(abs_archive_path) # copy archive to local fs synced_archive_filename = os.path.join(self.folder_for_archive, archive_name) sftp.rsync_from(abs_archive_path, self.folder_for_archive) if not os.path.exists(synced_archive_filename): raise Exception("Archive file is not exist") self.on_running(self.status_id, pid=self.pid, pname=self.name) self.logger.debug("Start extracting %s", abs_archive_path) # for rar and zip same algorithm if is_zipfile(synced_archive_filename) or\ rarfile.is_rarfile(synced_archive_filename) or\ SevenZFile.is_7zfile(synced_archive_filename): if is_zipfile(synced_archive_filename): self.logger.info("Archive ZIP type, using zipfile (beget)") a = ZipFile(synced_archive_filename) elif rarfile.is_rarfile(synced_archive_filename): self.logger.info("Archive RAR type, using rarfile") a = rarfile.RarFile(synced_archive_filename) else: self.logger.info("Archive 7Zip type, using py7zlib") a = SevenZFile(synced_archive_filename) # extract Empty Files first for fileinfo in a.archive.header.files.files: if not fileinfo['emptystream']: continue name = fileinfo['filename'] try: unicode_name = name.encode('UTF-8').decode('UTF-8') except UnicodeDecodeError: unicode_name = name.encode('cp866').decode('UTF-8') unicode_name = unicode_name.replace( '\\', '/') # For windows name in rar etc. file_name = os.path.join(self.tmp_dir, unicode_name) dir_name = os.path.dirname(file_name) if not os.path.exists(dir_name): os.makedirs(dir_name) if os.path.exists( dir_name) and not os.path.isdir(dir_name): os.remove(dir_name) os.makedirs(dir_name) if os.path.isdir(file_name): continue f = open(file_name, 'w') f.close() infolist = a.infolist() not_ascii = False # checking ascii names try: self.tmp_dir.encode('utf-8').decode('ascii') for name in a.namelist(): name.encode('utf-8').decode('ascii') except UnicodeDecodeError: not_ascii = True except UnicodeEncodeError: not_ascii = True t = threading.Thread(target=self.progress, args=(infolist, self.extracted_files, abs_extract_path)) t.daemon = True t.start() try: if not_ascii: for name in a.namelist(): try: unicode_name = name.encode('UTF-8').decode( 'UTF-8') except UnicodeDecodeError: unicode_name = name.encode('cp866').decode( 'UTF-8') unicode_name = unicode_name.replace( '\\', '/') # For windows name in rar etc. file_name = os.path.join(self.tmp_dir, unicode_name) dir_name = os.path.dirname(file_name) if not os.path.exists(dir_name): os.makedirs(dir_name) if os.path.exists( dir_name) and not os.path.isdir(dir_name): os.remove(dir_name) os.makedirs(dir_name) if os.path.isdir(file_name): continue f = open(file_name, 'wb') try: data = a.read(name) f.write(data) f.close() except TypeError: # pass for directories its make recursively for files f.close() os.remove(file_name) else: self.logger.info("EXTRACT ALL to %s , encoded = %s" % (pprint.pformat(self.tmp_dir), pprint.pformat(self.tmp_dir))) a.extractall( self.tmp_dir ) # Not working with non-ascii windows folders except Exception as e: self.logger.error("Error extract path %s. %s" % (str(e), traceback.format_exc())) raise e finally: self.extracted_files["done"] = True t.join() elif libarchive.is_archive(synced_archive_filename): self.logger.info("Archive other type, using libarchive") next_tick = time.time() + REQUEST_DELAY print( pprint.pformat("Clock = %s , tick = %s" % (str(time.time()), str(next_tick)))) infolist = [] with libarchive.Archive(synced_archive_filename, entry_class=Entry) as a: for entry in a: infolist.append(entry) with libarchive.Archive(synced_archive_filename, entry_class=Entry) as a: for entry in a: entry_path = os.path.join(self.tmp_dir, entry.pathname) self.logger.debug("Entry pathname %s - %s", entry.pathname, entry.size) if time.time() > next_tick: progress = { 'percent': round( float(self.extracted_files["count"]) / float(len(infolist)), 2), 'text': str( int( round( float(self.extracted_files["count"] ) / float(len(infolist)), 2) * 100)) + '%' } self.on_running(self.status_id, progress=progress, pid=self.pid, pname=self.name) next_tick = time.time() + REQUEST_DELAY self.extracted_files["count"] += 1 dir_name = os.path.dirname(entry_path) if not os.path.exists(dir_name): os.makedirs(dir_name) if os.path.exists( dir_name) and not os.path.isdir(dir_name): os.remove(dir_name) os.makedirs(dir_name) if os.path.isdir(entry_path): continue f = open(entry_path, 'w') a.readpath(f) elif abs_archive_path[-3:] == ".gz": self.logger.info("gz file type, using gzip") try: # if its just a gz file a = gzip.open(synced_archive_filename) file_content = a.read() a.close() file_name = os.path.splitext( os.path.basename(synced_archive_filename))[0] file_path = os.path.join(self.tmp_dir, file_name) infolist = [file_name] dir_name = os.path.dirname(file_path) if not os.path.exists(dir_name): os.makedirs(dir_name) extracted = open(file_path, 'wb') extracted.write(file_content) extracted.close() except Exception as e: raise e finally: self.extracted_files["done"] = True else: raise Exception("Archive file has unknown format") sftp.rsync_to(self.tmp_dir + '/.', abs_extract_path) os.remove(synced_archive_filename) shutil.rmtree(self.tmp_dir) progress = { 'percent': round( float(self.extracted_files["count"]) / float(len(infolist)), 2), 'text': str( int( round( float(self.extracted_files["count"]) / float(len(infolist)), 2) * 100)) + '%' } result = {} time.sleep(REQUEST_DELAY) self.on_success(self.status_id, progress=progress, data=result, pid=self.pid, pname=self.name) except Exception as e: self.extracted_files["done"] = True result = { "error": True, "message": str(e), "traceback": traceback.format_exc() } self.logger.error("SFTP ExtractArchive Error = {}".format(result)) self.on_error(self.status_id, result, pid=self.pid, pname=self.name) finally: if os.path.exists(self.tmp_dir): shutil.rmtree(self.tmp_dir) if os.path.exists(self.folder_for_archive): shutil.rmtree(self.folder_for_archive)
def libarchive_list(path: Path) -> Iterator[FilenameWithMaybeSizeAndCRC]: with path.open('rb') as f: with libarchive.Archive(f, 'r') as a: #Note: libarchive does _not_ close archives properly even when a context manager if the path was a string, according to the ResourceWarnings I get, so if we are going to handle that ourselves we might as well just use a Path (which libarchive does not support seemingly) for item in a: yield item.pathname, item.size, None
def run(self): try: self.preload() abs_archive_path = self.get_abs_path(self.path) dir_name = os.path.dirname(abs_archive_path) if not os.path.exists(dir_name): os.makedirs(dir_name) if not os.path.isdir(dir_name): raise Exception("Destination path is not a directory") archive_type = self.get_archive_type(self.type) if not archive_type: raise Exception("Unknown archive type") archive_path = abs_archive_path + "." + archive_type if os.path.exists(archive_path): raise Exception("Archive file already exist") self.on_running(self.status_id, pid=self.pid, pname=self.name) archive = libarchive.Archive(archive_path, "w") next_tick = time.time() + REQUEST_DELAY i = 0 for file_item in self.file_items: try: abs_path = self.get_abs_path(file_item.get("path")) file_basename = os.path.basename(abs_path) if os.path.isfile(abs_path): self.logger.info("Packing file: %s" % (abs_path, )) f = open(abs_path, 'rb') archive.write(self.make_entry(abs_path, file_basename), data=f.read()) f.close() elif os.path.isdir(abs_path): self.logger.info("Packing dir: %s" % (abs_path, )) for current, dirs, files in os.walk(abs_path): for f in files: file_path = os.path.join(current, f) file_obj = open(file_path, 'rb') rel_path = os.path.relpath(file_path, abs_path) base_path = os.path.join( file_basename, rel_path) archive.write(self.make_entry( file_path, base_path), data=file_obj.read()) file_obj.close() i += 1 if time.time() > next_tick: progress = { 'percent': round(float(i) / float(len(self.file_items)), 2), 'text': str( int( round( float(i) / float(len(self.file_items)), 2) * 100)) + '%' } self.on_running(self.status_id, progress=progress, pid=self.pid, pname=self.name) next_tick = time.time() + REQUEST_DELAY except Exception as e: self.logger.error( "Error archive file %s , error %s , %s" % (str(file_item), str(e), traceback.format_exc())) raise e progress = { 'percent': round(float(i) / float(len(self.file_items)), 2), 'text': str(int( round(float(i) / float(len(self.file_items)), 2) * 100)) + '%' } result = {"archive": self._make_file_info(archive_path)} self.on_success(self.status_id, data=result, progress=progress, pid=self.pid, pname=self.name) except Exception as e: result = { "error": True, "message": str(e), "traceback": traceback.format_exc() } self.on_error(self.status_id, result, pid=self.pid, pname=self.name)
def open(path): return libarchive.Archive(u2s(path))