def test_multiblock_mem(): archive = py7zr.SevenZipFile(testdata_path.joinpath('mblock_1.7z').open(mode='rb')) _dict = archive.readall() m = hashlib.sha256() m.update(_dict["bin/7zdec.exe"].read()) assert m.digest() == binascii.unhexlify('e14d8201c5c0d1049e717a63898a3b1c7ce4054a24871daebaa717da64dcaff5') archive.close()
def test_bugzilla_16(tmp_path): archive = py7zr.SevenZipFile(open(os.path.join(testdata_path, 'bugzilla_16.7z'), 'rb')) expected = [{'filename': 'mame4all_2.5.ini', 'digest': 'aaebca5e140e0099a757903fc9f194f9e6da388eed22d37bfd1625c80aa25903'}, {'filename': 'mame4all_2.5/mame', 'digest': '6bc23b11fbb9a64096408623d476ad16083ef71c5e7919335e8696036034987d'}] decode_all(archive, expected, tmp_path)
def test_multiblock(tmp_path): archive = py7zr.SevenZipFile(open(os.path.join(testdata_path, 'mblock_1.7z'), 'rb')) archive.extractall(path=tmp_path) archive.close() m = hashlib.sha256() m.update(tmp_path.joinpath('bin/7zdec.exe').open('rb').read()) assert m.digest() == binascii.unhexlify('e14d8201c5c0d1049e717a63898a3b1c7ce4054a24871daebaa717da64dcaff5')
def retrieve_archive(self, package, path=None, command=None): archive = package.archive url = package.url self.logger.info("-Downloading {}...".format(url)) try: r = requests.get(url, allow_redirects=False, stream=True) if r.status_code == 302: newurl = altlink(r.url) # newurl = r.headers['Location'] self.logger.info('Redirected to new URL: {}'.format(newurl)) r = requests.get(newurl, stream=True) except requests.exceptions.ConnectionError as e: self.logger.warning("Caught download error: %s" % e.args) return False else: with open(archive, 'wb') as fd: for chunk in r.iter_content(chunk_size=8196): fd.write(chunk) self.logger.info("-Extracting {}...".format(archive)) if sys.version_info > (3, 5): if not py7zr.is_7zfile(archive): raise BadPackageFile if command is None: py7zr.SevenZipFile(archive).extractall(path=path) else: if path is not None: run([command, 'x', '-aoa', '-bd', '-y', '-o{}'.format(path), archive]) else: run([command, 'x', '-aoa', '-bd', '-y', archive]) os.unlink(archive) return True
async def inflate_archive(self, file_ids, fileformat, name, password=None): try: # TODO: will in future support multiple files instead of string ids? file_ids = file_ids.split() print("picking {}".format(file_ids)) # GET all items from shuffle items = [self.get_file(file_id) for file_id in file_ids] if len(items) == 0: return "No file to inflate" # Dump files on disk, because libs want path :( with tempfile.TemporaryDirectory() as tmpdir: paths = [] print("Number 1") for item in items: with open(os.path.join(tmpdir, item["filename"]), "wb") as f: f.write(item["data"]) paths.append(os.path.join(tmpdir, item["filename"])) # Create archive temporary print("{} items to inflate".format(len(items))) with tempfile.NamedTemporaryFile() as archive: if fileformat == "zip": archive_name = "archive.zip" if not name else name pyminizip.compress_multiple( paths, [], archive.name, password, 5 ) elif fileformat == "7zip": archive_name = "archive.7z" if not name else name with py7zr.SevenZipFile( archive.name, "w", password=password if len(password) > 0 else None, ) as sz_archive: for path in paths: sz_archive.write(path) else: return "Format {} not supported".format(fileformat) return_id = self.set_files( [{"filename": archive_name, "data": open(archive.name, "rb")}] ) if len(return_id) == 1: # Returns the first file's ID return {"success": True, "id": return_id[0]} else: return { "success": False, "message": "Upload archive returned {}".format(return_id), } except Exception as excp: return {"success": False, "message": excp}
def test_symlink(): archive = py7zr.SevenZipFile(open(os.path.join(testdata_path, 'symlink.7z'), 'rb')) assert sorted(archive.getnames()) == ['lib', 'lib/libabc.so', 'lib/libabc.so.1', 'lib/libabc.so.1.2', 'lib/libabc.so.1.2.3', 'lib64'] tmpdir = tempfile.mkdtemp() archive.extractall(path=tmpdir) shutil.rmtree(tmpdir)
def test_extract_callback(tmp_path): class ECB(py7zr.callbacks.ExtractCallback): def __init__(self, ofd): self.ofd = ofd def report_start_preparation(self): self.ofd.write('preparation.\n') def report_start(self, processing_file_path, processing_bytes): self.ofd.write('start \"{}\" (compressed in {} bytes)\n'.format( processing_file_path, processing_bytes)) def report_end(self, processing_file_path, wrote_bytes): self.ofd.write('end \"{}\" extracted to {} bytes\n'.format( processing_file_path, wrote_bytes)) def report_postprocess(self): self.ofd.write('post processing.\n') def report_warning(self, message): self.ofd.write('warning: {:s}\n'.format(message)) cb = ECB(sys.stdout) with py7zr.SevenZipFile( open(os.path.join(testdata_path, 'test_1.7z'), 'rb')) as archive: archive.extractall(path=tmp_path, callback=cb)
def test_extract_symlink_with_relative_target_path(tmp_path): archive = py7zr.SevenZipFile(open(os.path.join(testdata_path, 'symlink.7z'), 'rb')) os.chdir(str(tmp_path)) os.makedirs(str(tmp_path.joinpath('target'))) # py35 need str() against pathlib.Path archive.extractall(path='target') assert os.readlink(str(tmp_path.joinpath('target/lib/libabc.so.1.2'))) == 'libabc.so.1.2.3' archive.close()
def test_extract_emptystream_mix(tmp_path, return_dict: bool): archive = py7zr.SevenZipFile(os.path.join(testdata_path, 'test_6.7z'), 'r') if not return_dict: archive.extractall(path=tmp_path, return_dict=return_dict) else: _dict = archive.extractall(return_dict=return_dict) archive.close()
def test_extract_encrypted_2(tmp_path, return_dict: bool): archive = py7zr.SevenZipFile(open(os.path.join(testdata_path, 'encrypted_2.7z'), 'rb'), password='******') if not return_dict: archive.extractall(path=tmp_path, return_dict=return_dict) else: _dict = archive.extractall(return_dict=return_dict) archive.close()
def test_extract_bzip2_2(tmp_path, return_dict: bool): archive = py7zr.SevenZipFile(open(os.path.join(testdata_path, 'bzip2_2.7z'), 'rb')) if not return_dict: archive.extractall(path=tmp_path, return_dict=return_dict) else: _dict = archive.extractall(return_dict=return_dict) archive.close()
def test_no_main_streams(tmp_path, return_dict: bool): archive = py7zr.SevenZipFile(open(os.path.join(testdata_path, 'test_folder.7z'), 'rb')) if not return_dict: archive.extractall(path=tmp_path, return_dict=return_dict) else: _dict = archive.extractall(return_dict=return_dict) archive.close()
def test_multiblock_zerosize(tmp_path, return_dict: bool): archive = py7zr.SevenZipFile(open(os.path.join(testdata_path, 'mblock_2.7z'), 'rb')) if not return_dict: archive.extractall(path=tmp_path, return_dict=return_dict) else: _dict = archive.extractall(return_dict=return_dict) archive.close()
def test_close_unlink(tmp_path): shutil.copyfile(os.path.join(testdata_path, 'test_1.7z'), str(tmp_path.joinpath('test_1.7z'))) archive = py7zr.SevenZipFile(tmp_path.joinpath('test_1.7z')) archive.extractall(path=tmp_path) archive.close() tmp_path.joinpath('test_1.7z').unlink()
def test_github_14(tmp_path): archive = py7zr.SevenZipFile( open(os.path.join(testdata_path, 'github_14.7z'), 'rb')) assert archive.getnames() == ['github_14'] archive.extractall(path=tmp_path) with open(tmp_path.joinpath('github_14'), 'rb') as f: assert f.read() == bytes('Hello GitHub issue #14.\n', 'ascii')
def test_compress_deflate64(tmp_path): my_filters = [{"id": FILTER_DEFLATE64}] with pytest.raises(UnsupportedCompressionMethodError): with py7zr.SevenZipFile(tmp_path.joinpath("target.7z"), "w", filters=my_filters) as archive: archive.write(testdata_path.joinpath("src"), "src")
def _7z_from_kaggle(self): # import subprocess try: import kaggle except OSError as e: raise OSError( f"""" Need to provide Kaggle credentials to download this data set. See guide at https://github.com/Kaggle/kaggle-api#api-credentials. """ ) files = ['train', 'transactions', 'members_v3'] print('Downloading from Kaggle...') import os for file in files: file_name = os.path.join(self._path_dir, file + '.csv.7z') if not os.path.exists(file_name): kaggle.api.competition_download_file('kkbox-churn-prediction-challenge', file + '.csv.7z', path=self._path_dir, force=True) import py7zr for file in files: file_name = str(self._path_dir / (file + '.csv.7z')) if not os.path.exists(file_name): print(f"Extracting '{file}'...") # os.chmod(file_name,0o777) archive = py7zr.SevenZipFile(file_name, mode='r') archive.extractall(path=self._path_dir) archive.close() # subprocess.check_output(['py7zr','x',file_name]) # subprocess.check_output(['py7zr','x',file_name]) # subprocess.check_output(['7z', 'x', str(self._path_dir / (file + '.csv.7z')),f"-o{self._path_dir}"]) print(f"Finished extracting '{file}'.")
def run_test(self, args): target = args.arcfile if not py7zr.is_7zfile(target): print("not a 7z file") return 1 with open(target, "rb") as f: try: a = py7zr.SevenZipFile(f) file = sys.stdout file.write("Testing archive: {}\n".format(a.filename)) self.print_archiveinfo(archive=a, file=file) file.write("\n") if a.testzip() is None: file.write("Everything is Ok\n") return 0 else: file.write("Bad 7zip file\n") return 1 except py7zr.exceptions.Bad7zFile: print("Header is corrupted. Cannot read as 7z file.") return 1 except py7zr.exceptions.PasswordRequired: print( "The archive is encrypted but password is not given. FAILED." ) return 1
def test_basic_extract_1(tmp_path): archive = py7zr.SevenZipFile( open(os.path.join(testdata_path, 'test_1.7z'), 'rb')) expected = [{ 'filename': 'setup.cfg', 'mode': 33188, 'mtime': 1552522033, 'digest': 'ff77878e070c4ba52732b0c847b5a055a7c454731939c3217db4a7fb4a1e7240' }, { 'filename': 'setup.py', 'mode': 33188, 'mtime': 1552522141, 'digest': 'b916eed2a4ee4e48c51a2b51d07d450de0be4dbb83d20e67f6fd166ff7921e49' }, { 'filename': 'scripts/py7zr', 'mode': 33261, 'mtime': 1552522208, 'digest': 'b0385e71d6a07eb692f5fb9798e9d33aaf87be7dfff936fd2473eab2a593d4fd' }] decode_all(archive, expected, tmp_path)
def run_create(self, args): sztarget = args.arcfile # type: str filenames = args.filenames # type: List[str] volume_size = args.volume[0] if getattr(args, "volume", None) is not None else None if volume_size is not None and not self._check_volumesize_valid( volume_size): sys.stderr.write("Error: Specified volume size is invalid.\n") self.show_help(args) exit(1) if not sztarget.endswith(".7z"): sztarget += ".7z" target = pathlib.Path(sztarget) if target.exists(): sys.stderr.write("Archive file exists!\n") self.show_help(args) exit(1) with py7zr.SevenZipFile(target, "w") as szf: for path in filenames: src = pathlib.Path(path) if src.is_dir(): szf.writeall(src) else: szf.write(src) if volume_size is None: return 0 size = self._volumesize_unitconv(volume_size) self._split_file(target, size) target.unlink() return 0
def playerid2player_idx_map_worker(game_7zs, queue): playerid2props = {} for game_7z in game_7zs: game_name = game_7z.split(".7z")[0] try: archive = py7zr.SevenZipFile(f"{TRACKING_DIR}/{game_7z}", mode="r") archive.extractall(path=f"{TRACKING_DIR}/{game_name}") archive.close() except AttributeError: print(f"{game_name}\nBusted.", flush=True) shutil.rmtree(f"{TRACKING_DIR}/{game_name}") continue try: gameid = os.listdir(f"{TRACKING_DIR}/{game_name}")[0].split(".")[0] except IndexError: print(f"No tracking data for {game_name}.", flush=True) shutil.rmtree(f"{TRACKING_DIR}/{game_name}") continue df_tracking = pd.read_json(f"{TRACKING_DIR}/{game_name}/{gameid}.json") event = df_tracking["events"].iloc[0] players = event["home"]["players"] + event["visitor"]["players"] for player in players: playerid = player["playerid"] playerid2props[playerid] = { "name": " ".join([player["firstname"], player["lastname"]]), } queue.put(playerid2props)
def sevenzip_get_crc32(path: Path, filename: str) -> int: with py7zr.SevenZipFile(path, mode='r') as sevenzip_file: for each in sevenzip_file.list(): if each.filename == filename: return each.crc32 raise FileNotFoundError(f'{filename} is not in {path}')
def create_splits(): """ Distributes encrypted data. Package and compress parts to archive""" copied = {} log("i", "Split secret!") for i in range(0, DISTRIBUTION): log("i", 'Generating secret split [' + str(i) + '.ss]!') dist_path = path.join(TMPPATH, str(i)) os.mkdir(dist_path) # Distribute parts for part in calculate_distribution(i): if part not in copied.keys(): copied[part] = 0 else: copied[part] += 1 ident = "P" + str(part) + str(copied[part]) + "." shutil.move(TMPPATH + ident + "cipher", dist_path) shutil.move(TMPPATH + ident + "tag", dist_path) shutil.move(TMPPATH + ident + "nonce", dist_path) # Copy spec config shutil.copy(path.join(TMPPATH, SPECFILE), dist_path) # Generate archive root_path = os.getcwd() os.chdir(dist_path) archive = py7zr.SevenZipFile(path.join("..", "..", str(i) + '.ss'), 'w') archive.writeall(".") archive.close() os.chdir(root_path)
def extract_7z(self): count = 0 targe_folder = os.path.join(self.des_folder, TODAY) while os.path.exists(targe_folder): count += 1 targe_folder = os.path.join(self.des_folder, TODAY) + '_' + str(count) else: os.mkdir(targe_folder) print(f'Extract all .7z files in:\n {self.src_folder}\nTo:\n {targe_folder}\n-------------------------------------------------------------------\n', flush=True) extracted_files = [] enum = list( enumerate(os.listdir(self.src_folder)) ) for idx, f_name in enum: if f_name.endswith('.7z'): abs_f_name = os.path.join(self.src_folder, f_name) print(f'Extracing file {idx} of {enum[-1][0]}:', flush=True) try: print(f' {abs_f_name}\n', flush=True) with py7zr.SevenZipFile(abs_f_name, mode='r', password='******') as z: z.extractall(targe_folder) extracted_files.append(abs_f_name) except: print(f'ERROR: error occured when extracting:\n {abs_f_name}', flush=True) for f_to_del in extracted_files: if os.path.exists(f_to_del): send2trash(f_to_del) else: print(f'WARNING: The file to be delete not exist:\n {f_to_del}', flush=True) print('DONE')
def make_7z_backup() -> None: os.system('clear') print('Making 7z file') with py7zr.SevenZipFile(BACKUP_7Z_NAME, 'w') as archive: tqdm(archive.writeall(BACKUP_DIR)) print('done') time.sleep(1)
def extractcars(): arr = os.listdir(modcars) try: firstfile = arr[0] except IndexError: return pathtozip = modcars + firstfile if firstfile.endswith('.7z'): archive = py7zr.SevenZipFile(pathtozip, mode='r') archive.extractall(path=accars) archive.close() if os.path.exists(path=pathtozip): os.remove(path=pathtozip) else: print("The file doesn't exist") elif firstfile.endswith('.zip'): zf = ZipFile(pathtozip, 'r') zf.extractall(accars) zf.close() if os.path.exists(path=pathtozip): os.remove(path=pathtozip) else: print("The file doesn't exist") elif firstfile.endswith('.rar'): pyunpack.Archive(pathtozip).extractall(accars) if os.path.exists(path=pathtozip): os.remove(path=pathtozip) else: print("The file doesn't exist") else: print("Extract Car Function Error")
def test_skip(): archive = py7zr.SevenZipFile(open(os.path.join(testdata_path, 'test_1.7z'), 'rb')) for i, cf in enumerate(archive.files): assert cf is not None archive.worker.register_filelike(cf.id, None) archive.worker.extract(archive.fp, parallel=True) archive.close()
def extrae_datos(rootDir): # procesos_comunes.crear_carpeta_fichero_trabajo(rootDir) rootCarpetaTrabajo = rootDir logging.debug("Directorio actual " + rootDir) directories = os.listdir(rootDir) rootDir_sub = rootDir for file in directories: if os.path.isdir(os.path.join(rootDir, file)): logging.debug("Carpeta: " + rootDir + file) #print("Es una carpeta: " + rootDir + file) filename = os.path.basename(file) (carpeta, ext) = os.path.splitext(filename) rootDir_sub = rootDir_sub + "/" + file extrae_datos(rootDir_sub) else: if file.lower().endswith(".zip"): logging.debug("Es un zip: " + rootDir + '/' + file) #print("Es un zip: " + rootDir + '/' + file) (carpeta_zip, ext_zip) = os.path.splitext(file) with zipfile.ZipFile(rootDir + '/' + file, 'r') as zip_ref_zip: zip_ref_zip.extractall(rootCarpetaTrabajo) elif file.lower().endswith(".rar"): logging.debug("Es un rar: " + rootDir + '/' + file) (carpeta_rar, ext_rar) = os.path.splitext(file) r = rarfile.RarFile(rootDir + '/' + file) r.extractall(rootCarpetaTrabajo) r.close() elif file.lower().endswith(".7z"): logging.debug("Es un 7z: " + rootDir + '/' + file) #Archive(rootDir + '/' + file).extractall(rootCarpetaTrabajo) print(rootCarpetaTrabajo) f = rootDir + '/' + file with py7zr.SevenZipFile(f, mode='r') as z: z.extractall(rootCarpetaTrabajo)
def test_multiblock_lzma_bug(tmp_path): archive = py7zr.SevenZipFile(open(os.path.join(testdata_path, 'mblock_3.7z'), 'rb')) archive.extractall(path=tmp_path) archive.close() m = hashlib.sha256() m.update(tmp_path.joinpath('5.13.0/mingw73_64/plugins/canbus/qtvirtualcanbusd.dll').open('rb').read()) assert m.digest() == binascii.unhexlify('98985de41ddba789d039bb10d86ea3015bf0d8d9fa86b25a0490044c247233d3')
def _load_secrets_from_vault( password: str, vault_file: str) -> Dict[str, Union[str, Dict[str, str]]]: """This code loads the keys directly from the vault zip file. The schema of the vault's `secrets.json` file looks like this: >>> { >>> "SECRET_KEY": str, >>> "HOBOLINK_AUTH": { >>> "password": str, >>> "user": str, >>> "token": str >>> }, >>> "TWITTER_AUTH": { >>> "api_key": str, >>> "api_key_secret": str, >>> "access_token": str, >>> "access_token_secret": str, >>> "bearer_token": str >>> } >>> } Args: vault_password: (str) Password for opening up the `vault_file`. vault_file: (str) File path of the zip file containing `keys.json`. Returns: Dict of credentials. """ with py7zr.SevenZipFile(vault_file, mode='r', password=password) as f: archive = f.readall() d = json.load(archive['secrets.json']) return d