Beispiel #1
0
    def _add_path(self, zip_file: zipfile.ZipFile, parent: NodeDirectory, path: zipfile.Path):
        info: zipfile.ZipInfo = zip_file.getinfo(path.at)

        common_kwargs = dict(
            fs=parent.fs,
            # specifically the file portion
            name=path.name.encode("utf8"),
            # Not writeable!
            mode=propagate_owner_perms((info.external_attr >> 16) & (~0o222 if self.read_only else ~0)),
            # size=info.file_size,
            blocks=math.ceil(info.file_size / self.block_size),
            mtime=dt.datetime(*info.date_time),
            ctime=dt.datetime(*info.date_time),
        )

        if info.is_dir():
            entry = SimpleDirectory(
                **common_kwargs,
            )
        else:
            entry = SimpleFile(
                contents=bytearray(path.read_bytes()),
                **common_kwargs
            )

        parent.link_child(entry)

        if info.is_dir():
            for child_path in path.iterdir():
                self._add_path(zip_file, parent=entry, path=child_path)
Beispiel #2
0
    def extract_csvs(self, zipf: ZipFile) -> MovieLensDataSet:
        dataset = MovieLensDataSet()
        valid_stems = set(dataset.__annotations__)

        for info in zipf.filelist:
            path = Path(zipf, at=info.filename)

            if path.name.endswith('.csv') and (stem := path.name[:-len('.csv')]) in valid_stems:
                setattr(dataset, stem, path.open('r', encoding='utf-8'))
Beispiel #3
0
def test_1(act: Action):
    script_file = Path(act.files_dir / 'core_3097.zip',
                       at='core_3097_script.sql')
    act.script = script_file.read_text()
    act.expected_stdout = expected_stdout
    act.expected_stderr = expected_stderr
    act.execute()
    assert (act.clean_stderr == act.clean_expected_stderr
            and act.clean_stdout == act.clean_expected_stdout)
Beispiel #4
0
def main(location: list[str], target_time: int):
    """
    The main function that runs all the other parts of the program, in the proper sequence.
    This part of the program is fine - no fixes needed!

    Args:
        location (list[str]): A sequence of locations and sublocations that will navigate
            us through the Security JSON file.
        target_time (int): The current time we are expecting to arrive at the location.
    Prints:
        The final, single emoji password
    """
    # Load the Zip File
    ZIP_FILE_NAME = 'mysterious_drive.zip'
    root = Path(ZIP_FILE_NAME)
    # Search the Zip File
    security_file = search_zip(root, ".json")[0]
    # Read the JSON file
    security = json.loads(security_file.read_bytes())
    # Access the JSON path
    passwords = access_path(security, location)
    # Binary Search the Times
    time_data = binary_search_time(passwords, 0,
                                   len(passwords) - 1, target_time)
    maze = time_data['password']
    # Solve the 1d Maze
    answer = solve(maze, 0, set())
    # Print the Answer
    print(answer)
Beispiel #5
0
def zipfile_to_documents(
        corpus_zipfile: ZipFile,
        prefix: Optional[str]) -> List[Tuple[str, str, str, str]]:
    print(f"Reading .ltf documents in {corpus_zipfile.filename}")

    if prefix is None:
        prefix = get_root_dir_name(corpus_zipfile) or ""

    parent_children_path = _find_name_in_zip(
        corpus_zipfile, re.compile(f"{prefix}docs/parent_children.tab"))

    if not parent_children_path:
        raise RuntimeError("Archive lacks parent_children.tab")

    parent_children_tab = _read_tab_file(
        CharSource.from_file_in_zip(corpus_zipfile, parent_children_path))

    child_to_parent_map = _create_child_to_parent_map(parent_children_tab)
    child_to_lang_map = _create_child_to_lang_map(parent_children_tab)

    documents = []
    text_dir = ZipPath(corpus_zipfile, at=f"{prefix}data/ltf/")

    for source_doc_path in text_dir.iterdir():
        source_doc_zip = ZipFile(io.BytesIO(source_doc_path.read_bytes()))

        for source_info in tqdm(
                source_doc_zip.infolist(),
                desc=f"Extracting {source_doc_path.name}",
                bar_format="{l_bar}{bar:20}{r_bar}",
        ):

            doceid_path = ZipPath(source_doc_zip, at=source_info.filename)
            try:
                doceid = doceid_path.name.split(".")[0]
                doc_id = child_to_parent_map[doceid]
                lang_id = child_to_lang_map[doceid]
                raw_text = convert_ltf_to_raw_text(
                    doceid_path.read_text(encoding="utf-8"))

                documents.append((doc_id, doceid, lang_id, raw_text))

            except AttributeError:
                raise FileNotFoundError(f"Could not read from {doceid_path}.")

    return documents
Beispiel #6
0
def extract_folder(zipfile: ZipFile, rootfolder: ZipPath, folderpath: ZipPath, target: Path):
	for entry in folderpath.iterdir():
		if entry.is_dir(): extract_folder(zipfile, rootfolder, entry, target)
		elif entry.is_file():
			file_target = Path(target, entry.at.replace(rootfolder.at, ''))
			file_target.parent.mkdir(parents=True, exist_ok=True)
			with zipfile.open(entry.at, 'r') as zf, open(file_target, 'wb') as f:
				f.write(zf.read())
Beispiel #7
0
def load(root):
    """
    Given a source directory (root) of a package,
    return an importlib.metadata.Distribution object
    with metadata build from that package.
    """
    root = os.path.expanduser(root)
    system = compat_system(root)
    builder = functools.partial(build, source_dir=root, system=system)
    path = Path(build_as_zip(builder))
    return imp_meta.PathDistribution(path)
def _copy_file_from_zip(zfp: zipfile.Path, p: Path) -> None:
    with p.open("wb") as writer:
        if sys.version_info >= (3, 9):
            # 3.9 now opens paths in zip files in text mode by default
            mode = "rb"
        else:
            # but before 3.9 it's not possible to specify binary "b" mode
            mode = "r"

        with zfp.open(mode) as reader:
            _io_copy(reader, writer)
Beispiel #9
0
 def copy_tree(self,
               package: Path,
               path: zipfile.Path,
               skip_list: List[str] = []):
     ""
     package.mkdir(exist_ok=True)
     for item in path.iterdir():
         if item.is_dir():
             self.copy_tree(package / item.name, item)
         elif item.name not in skip_list:
             target = package / item.name
             target.write_bytes(item.read_bytes())
Beispiel #10
0
def get_text_docs(corpus_zipfile: ZipFile) -> ImmutableDict[str, str]:
    print(f"Reading .ltf documents in {corpus_zipfile.filename}")

    prefix = get_root_dir_name(corpus_zipfile) or ""

    parent_children_path = _find_name_in_zip(
        corpus_zipfile, re.compile(f"{prefix}docs/parent_children.tab"))

    if not parent_children_path:
        raise RuntimeError("Archive lacks parent_children.tab")

    parent_children_tab = _read_tab_file(
        CharSource.from_file_in_zip(corpus_zipfile, parent_children_path))

    child_to_parent_map = _create_child_to_parent_map(parent_children_tab)

    text_docs = {}
    text_dir = ZipPath(corpus_zipfile, at="data/ltf/")

    for source_doc_path in text_dir.iterdir():
        source_doc_zip = ZipFile(io.BytesIO(source_doc_path.read_bytes()))

        for source_info in tqdm(
                source_doc_zip.infolist(),
                desc=f"Extracting {source_doc_path.name}",
                bar_format="{l_bar}{bar:20}{r_bar}",
        ):

            doc = ZipPath(source_doc_zip, at=source_info.filename)
            try:
                doceid = doc.name.split(".")[0]
                doc_id = child_to_parent_map[doceid]
                text_docs[doc_id] = convert_ltf_to_raw_text(
                    doc.read_text(encoding="utf-8"))
            except AttributeError:
                raise FileNotFoundError(f"Could not read from {doc}.")

    return immutabledict(text_docs)
def test_1(act: Action):
    # Read script and expected stdout from zip file
    datafile = Path(act.files_dir / 'core_2115.zip',
                    at='tmp_core_2115_queries_with_long_plans.sql')
    act.script = datafile.read_text()
    datafile = Path(act.files_dir / 'core_2115.zip',
                    at='tmp_core_2115_check_txt_of_long_plans.log')
    act.expected_stdout = datafile.read_text()
    act.execute()
    assert act.clean_stdout == act.clean_expected_stdout
Beispiel #12
0
def search_zip(folder: Path, file_type: str) -> list[Path]:
    """
    Recursively search a zip file by iterating through its files and subfolders.
    Paths are basically just fancy strings with some extra features, like checking
    if they are directories or files.
    Args:
        folder (Path): A ZipFile Path that represents a folder.
        file_type (str): The extension to check against the individual files
    Returns:
        list[Path]: All the matched Paths
    """
    result = []
    for folder_item in folder.iterdir():
        # Is it a directory?
        if folder_item.is_dir():
            result.extend(search_zip(folder_item, file_type))
        # Is it a file?
        if folder_item.is_file():
            if folder_item.name.endswith(file_type):
                result.append(folder_item)
    return result
Beispiel #13
0
def cmp_zipfile(archive, CUTOFF_DATE=date(2021, 3, 1)):
    """
	return a set of all the files in the zip archive which are older than CUTOFF_DATE
		- archive should be a zip file of a directory of data files. 
		- The directory and zip file's basenames should be the same
		- CUTOFF_DATE is an optional parameter with a default
	"""

    old_files = set()
    # TODO wrap this in an error handling 'try' block
    # TODO add logging of failures and successes
    with ZipFile(archive, 'r') as our_zip:
        for file in Path(our_zip, at=archive.split('.')[0] + '/').iterdir():
            # extract date info from filename convention. ex: 20210430_158732.dat
            year, month, day = file.name[0:4], file.name[4:6], file.name[6:8]
            # python's 'date' object wants integers and no leading zeroes.
            file_date = date(int(year), int(month.lstrip('0')),
                             int(day.lstrip('0')))
            if file_date < CUTOFF_DATE:
                old_files.add(file.name)
    return old_files
from zipfile import ZipFile, Path
from glob import glob

files = glob("**/*.txt", recursive=True)
print(files)

with ZipFile("archive.zip", "w") as zip:
    for file in files:
        zip.write(file)
    zip.printdir()

with ZipFile("archive.zip", "a") as zip:
    zip.write("update.txt")
    zip.printdir()

with ZipFile("archive.zip", "a") as zip:
    zip.extractall("archive")
    zip.extract("test.txt", "output")

    if Path(zip.filename, "update.txt").exists():
        with zip.open("update.txt", "r") as file:
            print(file.read())
        with zip.open("update.txt", "w") as file:
            file.write(b"Another update")
def _recursive_object_registration(base_path: ZipPath):
    for path in base_path.iterdir():
        if path.is_file() and path.name != "meta.json":
            register_object(path)
        elif path.is_dir():
            _recursive_object_registration(path)
def register_object(zip_path: ZipPath):
    data = loads(zip_path.read_text())
    register_map[data["type"]](data)
Beispiel #17
0
    def test_write_adds_file_to_archive(self, wf, tmp_file):
        tmp_file.write_text("contents")
        wf.write(tmp_file)
        arc_file = ZipPath(wf.zipfile, str(tmp_file.name).lstrip('/'))

        assert arc_file.read_text() == tmp_file.read_text()
Beispiel #18
0
    '''
    if depth == k:  # potential candidate
        ans.append(curr[::])
        return

    for i in range(n):
        curr.append(TEXT[i])
        backtrack(n, k, depth + 1, curr, ans)
        curr.pop()
    return


if __name__ == '__main__':
    # Load the Zip File
    ZIP_FILE_NAME = 'GS4/mysterious_drive.zip'
    root = Path(ZIP_FILE_NAME)
    # Search the Zip File to find password_hash.py & dictionary.txt
    pass_hash = search_zip(root, ".py")[0]
    # print(pass_hash)
    # mysterious_drive.zip/c/Home/atb/lib/usr/password_hash.py
    with open('GS4/dictionary.txt') as data_file:
        TEXT = data_file.readlines()
    i = 0
    for line in TEXT:
        TEXT[i] = line.rstrip()
        i += 1

    n = len(TEXT)
    ans = []

    for i in range(len(TEXT)):
Beispiel #19
0
def test_1(act: Action):
    script_file = Path(act.files_dir / 'core_4881.zip', at='core_4881_script.sql')
    act.script = script_file.read_text(encoding='utf-8')
    act.expected_stdout = expected_stdout
    act.execute()
    assert act.clean_stdout == act.clean_expected_stdout