def _add_path(self, zip_file: zipfile.ZipFile, parent: NodeDirectory, path: zipfile.Path): info: zipfile.ZipInfo = zip_file.getinfo(path.at) common_kwargs = dict( fs=parent.fs, # specifically the file portion name=path.name.encode("utf8"), # Not writeable! mode=propagate_owner_perms((info.external_attr >> 16) & (~0o222 if self.read_only else ~0)), # size=info.file_size, blocks=math.ceil(info.file_size / self.block_size), mtime=dt.datetime(*info.date_time), ctime=dt.datetime(*info.date_time), ) if info.is_dir(): entry = SimpleDirectory( **common_kwargs, ) else: entry = SimpleFile( contents=bytearray(path.read_bytes()), **common_kwargs ) parent.link_child(entry) if info.is_dir(): for child_path in path.iterdir(): self._add_path(zip_file, parent=entry, path=child_path)
def extract_csvs(self, zipf: ZipFile) -> MovieLensDataSet: dataset = MovieLensDataSet() valid_stems = set(dataset.__annotations__) for info in zipf.filelist: path = Path(zipf, at=info.filename) if path.name.endswith('.csv') and (stem := path.name[:-len('.csv')]) in valid_stems: setattr(dataset, stem, path.open('r', encoding='utf-8'))
def test_1(act: Action): script_file = Path(act.files_dir / 'core_3097.zip', at='core_3097_script.sql') act.script = script_file.read_text() act.expected_stdout = expected_stdout act.expected_stderr = expected_stderr act.execute() assert (act.clean_stderr == act.clean_expected_stderr and act.clean_stdout == act.clean_expected_stdout)
def main(location: list[str], target_time: int): """ The main function that runs all the other parts of the program, in the proper sequence. This part of the program is fine - no fixes needed! Args: location (list[str]): A sequence of locations and sublocations that will navigate us through the Security JSON file. target_time (int): The current time we are expecting to arrive at the location. Prints: The final, single emoji password """ # Load the Zip File ZIP_FILE_NAME = 'mysterious_drive.zip' root = Path(ZIP_FILE_NAME) # Search the Zip File security_file = search_zip(root, ".json")[0] # Read the JSON file security = json.loads(security_file.read_bytes()) # Access the JSON path passwords = access_path(security, location) # Binary Search the Times time_data = binary_search_time(passwords, 0, len(passwords) - 1, target_time) maze = time_data['password'] # Solve the 1d Maze answer = solve(maze, 0, set()) # Print the Answer print(answer)
def zipfile_to_documents( corpus_zipfile: ZipFile, prefix: Optional[str]) -> List[Tuple[str, str, str, str]]: print(f"Reading .ltf documents in {corpus_zipfile.filename}") if prefix is None: prefix = get_root_dir_name(corpus_zipfile) or "" parent_children_path = _find_name_in_zip( corpus_zipfile, re.compile(f"{prefix}docs/parent_children.tab")) if not parent_children_path: raise RuntimeError("Archive lacks parent_children.tab") parent_children_tab = _read_tab_file( CharSource.from_file_in_zip(corpus_zipfile, parent_children_path)) child_to_parent_map = _create_child_to_parent_map(parent_children_tab) child_to_lang_map = _create_child_to_lang_map(parent_children_tab) documents = [] text_dir = ZipPath(corpus_zipfile, at=f"{prefix}data/ltf/") for source_doc_path in text_dir.iterdir(): source_doc_zip = ZipFile(io.BytesIO(source_doc_path.read_bytes())) for source_info in tqdm( source_doc_zip.infolist(), desc=f"Extracting {source_doc_path.name}", bar_format="{l_bar}{bar:20}{r_bar}", ): doceid_path = ZipPath(source_doc_zip, at=source_info.filename) try: doceid = doceid_path.name.split(".")[0] doc_id = child_to_parent_map[doceid] lang_id = child_to_lang_map[doceid] raw_text = convert_ltf_to_raw_text( doceid_path.read_text(encoding="utf-8")) documents.append((doc_id, doceid, lang_id, raw_text)) except AttributeError: raise FileNotFoundError(f"Could not read from {doceid_path}.") return documents
def extract_folder(zipfile: ZipFile, rootfolder: ZipPath, folderpath: ZipPath, target: Path): for entry in folderpath.iterdir(): if entry.is_dir(): extract_folder(zipfile, rootfolder, entry, target) elif entry.is_file(): file_target = Path(target, entry.at.replace(rootfolder.at, '')) file_target.parent.mkdir(parents=True, exist_ok=True) with zipfile.open(entry.at, 'r') as zf, open(file_target, 'wb') as f: f.write(zf.read())
def load(root): """ Given a source directory (root) of a package, return an importlib.metadata.Distribution object with metadata build from that package. """ root = os.path.expanduser(root) system = compat_system(root) builder = functools.partial(build, source_dir=root, system=system) path = Path(build_as_zip(builder)) return imp_meta.PathDistribution(path)
def _copy_file_from_zip(zfp: zipfile.Path, p: Path) -> None: with p.open("wb") as writer: if sys.version_info >= (3, 9): # 3.9 now opens paths in zip files in text mode by default mode = "rb" else: # but before 3.9 it's not possible to specify binary "b" mode mode = "r" with zfp.open(mode) as reader: _io_copy(reader, writer)
def copy_tree(self, package: Path, path: zipfile.Path, skip_list: List[str] = []): "" package.mkdir(exist_ok=True) for item in path.iterdir(): if item.is_dir(): self.copy_tree(package / item.name, item) elif item.name not in skip_list: target = package / item.name target.write_bytes(item.read_bytes())
def get_text_docs(corpus_zipfile: ZipFile) -> ImmutableDict[str, str]: print(f"Reading .ltf documents in {corpus_zipfile.filename}") prefix = get_root_dir_name(corpus_zipfile) or "" parent_children_path = _find_name_in_zip( corpus_zipfile, re.compile(f"{prefix}docs/parent_children.tab")) if not parent_children_path: raise RuntimeError("Archive lacks parent_children.tab") parent_children_tab = _read_tab_file( CharSource.from_file_in_zip(corpus_zipfile, parent_children_path)) child_to_parent_map = _create_child_to_parent_map(parent_children_tab) text_docs = {} text_dir = ZipPath(corpus_zipfile, at="data/ltf/") for source_doc_path in text_dir.iterdir(): source_doc_zip = ZipFile(io.BytesIO(source_doc_path.read_bytes())) for source_info in tqdm( source_doc_zip.infolist(), desc=f"Extracting {source_doc_path.name}", bar_format="{l_bar}{bar:20}{r_bar}", ): doc = ZipPath(source_doc_zip, at=source_info.filename) try: doceid = doc.name.split(".")[0] doc_id = child_to_parent_map[doceid] text_docs[doc_id] = convert_ltf_to_raw_text( doc.read_text(encoding="utf-8")) except AttributeError: raise FileNotFoundError(f"Could not read from {doc}.") return immutabledict(text_docs)
def test_1(act: Action): # Read script and expected stdout from zip file datafile = Path(act.files_dir / 'core_2115.zip', at='tmp_core_2115_queries_with_long_plans.sql') act.script = datafile.read_text() datafile = Path(act.files_dir / 'core_2115.zip', at='tmp_core_2115_check_txt_of_long_plans.log') act.expected_stdout = datafile.read_text() act.execute() assert act.clean_stdout == act.clean_expected_stdout
def search_zip(folder: Path, file_type: str) -> list[Path]: """ Recursively search a zip file by iterating through its files and subfolders. Paths are basically just fancy strings with some extra features, like checking if they are directories or files. Args: folder (Path): A ZipFile Path that represents a folder. file_type (str): The extension to check against the individual files Returns: list[Path]: All the matched Paths """ result = [] for folder_item in folder.iterdir(): # Is it a directory? if folder_item.is_dir(): result.extend(search_zip(folder_item, file_type)) # Is it a file? if folder_item.is_file(): if folder_item.name.endswith(file_type): result.append(folder_item) return result
def cmp_zipfile(archive, CUTOFF_DATE=date(2021, 3, 1)): """ return a set of all the files in the zip archive which are older than CUTOFF_DATE - archive should be a zip file of a directory of data files. - The directory and zip file's basenames should be the same - CUTOFF_DATE is an optional parameter with a default """ old_files = set() # TODO wrap this in an error handling 'try' block # TODO add logging of failures and successes with ZipFile(archive, 'r') as our_zip: for file in Path(our_zip, at=archive.split('.')[0] + '/').iterdir(): # extract date info from filename convention. ex: 20210430_158732.dat year, month, day = file.name[0:4], file.name[4:6], file.name[6:8] # python's 'date' object wants integers and no leading zeroes. file_date = date(int(year), int(month.lstrip('0')), int(day.lstrip('0'))) if file_date < CUTOFF_DATE: old_files.add(file.name) return old_files
from zipfile import ZipFile, Path from glob import glob files = glob("**/*.txt", recursive=True) print(files) with ZipFile("archive.zip", "w") as zip: for file in files: zip.write(file) zip.printdir() with ZipFile("archive.zip", "a") as zip: zip.write("update.txt") zip.printdir() with ZipFile("archive.zip", "a") as zip: zip.extractall("archive") zip.extract("test.txt", "output") if Path(zip.filename, "update.txt").exists(): with zip.open("update.txt", "r") as file: print(file.read()) with zip.open("update.txt", "w") as file: file.write(b"Another update")
def _recursive_object_registration(base_path: ZipPath): for path in base_path.iterdir(): if path.is_file() and path.name != "meta.json": register_object(path) elif path.is_dir(): _recursive_object_registration(path)
def register_object(zip_path: ZipPath): data = loads(zip_path.read_text()) register_map[data["type"]](data)
def test_write_adds_file_to_archive(self, wf, tmp_file): tmp_file.write_text("contents") wf.write(tmp_file) arc_file = ZipPath(wf.zipfile, str(tmp_file.name).lstrip('/')) assert arc_file.read_text() == tmp_file.read_text()
''' if depth == k: # potential candidate ans.append(curr[::]) return for i in range(n): curr.append(TEXT[i]) backtrack(n, k, depth + 1, curr, ans) curr.pop() return if __name__ == '__main__': # Load the Zip File ZIP_FILE_NAME = 'GS4/mysterious_drive.zip' root = Path(ZIP_FILE_NAME) # Search the Zip File to find password_hash.py & dictionary.txt pass_hash = search_zip(root, ".py")[0] # print(pass_hash) # mysterious_drive.zip/c/Home/atb/lib/usr/password_hash.py with open('GS4/dictionary.txt') as data_file: TEXT = data_file.readlines() i = 0 for line in TEXT: TEXT[i] = line.rstrip() i += 1 n = len(TEXT) ans = [] for i in range(len(TEXT)):
def test_1(act: Action): script_file = Path(act.files_dir / 'core_4881.zip', at='core_4881_script.sql') act.script = script_file.read_text(encoding='utf-8') act.expected_stdout = expected_stdout act.execute() assert act.clean_stdout == act.clean_expected_stdout