Python unzip_archive Examples

Programming Language: Python

Namespace/Package Name: flywheel_gear_toolkit.utils.zip_tools

Method/Function: unzip_archive

Examples at hotexamples.com: 6

Python unzip_archive - 6 examples found. These are the top rated real world Python examples of flywheel_gear_toolkit.utils.zip_tools.unzip_archive extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def install_gear(zip_name):
    """unarchive initial gear to simulate running inside a real gear.

    This will delete and then install: config.json input/ output/ work/

    Args:
        zip_name (str): name of zip file that holds simulated gear.
    """

    gear_tests = "/src/tests/data/gear_tests/"
    gear = "/flywheel/v0/"
    os.chdir(
        gear)  # Make sure we're in the right place (gear works in "work/" dir)

    print("\nRemoving previous gear...")

    if Path(gear + "config.json").exists():
        Path(gear + "config.json").unlink()

    for dir_name in ["input", "output", "work"]:
        path = Path(gear + dir_name)
        if path.exists():
            shutil.rmtree(path)

    print(f'\ninstalling new gear, "{zip_name}"...')
    unzip_archive(gear_tests + zip_name, gear)

    # swap in user's api-key if there is one (fake) in the config
    config_json = Path("./config.json")
    if config_json.exists():
        print(f"Found {str(config_json)}")
        api_dict = None
        with open(config_json) as cjf:
            config_dict = json.load(cjf)
            pprint(config_dict["inputs"])
            if "api_key" in config_dict["inputs"]:
                print(f'Found "api_key" in config_dict["inputs"]')

                user_json = Path(Path.home() / ".config/flywheel/user.json")
                if user_json.exists():
                    with open(user_json) as ujf:
                        api_dict = json.load(ujf)
                    config_dict["inputs"]["api_key"]["key"] = api_dict["key"]
                    print(f"installing api-key...")
                else:
                    print(f"{str(user_json)} not found.  Can't get api key.")
            else:
                print(f'No "api_key" in config_dict["inputs"]')

        if api_dict:
            with open(config_json, "w") as cjf:
                json.dump(config_dict, cjf)
    else:
        print(f"{str(config_json)} does not exist.  Can't set api key.")

Example #2

Show file

    def _method(zip_name):
        """unarchive initial gear to simulate running inside a real gear.

        This will delete and then install: config.json input/ output/ work/

        Args:
            zip_name (str): name of zip file that holds simulated gear.
        """

        gear_tests = "/src/tests/data/gear_tests/"
        if not Path(gear_tests).exists():  # fix for running in circleci
            gear_tests = "/flywheel/v0/tests/data/gear_tests/"

        gear = "/flywheel/v0/"
        os.chdir(
            gear
        )  # Make sure we're in the right place (gear works in "work/" dir)

        print("\nRemoving previous gear...")

        if Path(gear + "config.json").exists():
            Path(gear + "config.json").unlink()

        for dir_name in ["input", "output", "work", "freesurfer"]:
            path = Path(gear + dir_name)
            if path.exists():
                shutil.rmtree(path)

        print(f'\ninstalling new gear, "{zip_name}"...')
        unzip_archive(gear_tests + zip_name, gear)

        # move freesurfer license and subject directories to proper place
        gear_freesurfer = Path("freesurfer")
        if gear_freesurfer.exists():
            if not SUBJECTS_DIR.exists():
                print(f"WARNING: had to create {SUBJECTS_DIR}")
                SUBJECTS_DIR.mkdir(parents=True)
            gear_license = Path(gear_freesurfer / "license.txt")
            if gear_license.exists():
                if Path(FS_DIR / "license.txt").exists():
                    Path(FS_DIR / "license.txt").unlink()
                shutil.move(str(gear_license), str(FS_DIR))
            subjects = gear_freesurfer.glob("subjects/*")
            for subj in subjects:
                subj_name = subj.name
                if (SUBJECTS_DIR / subj_name).exists():
                    shutil.rmtree(SUBJECTS_DIR / subj_name)
                print(f"moving subject to {str(SUBJECTS_DIR / subj_name)}")
                shutil.move(str(subj), str(SUBJECTS_DIR))

Example #3

Show file

    def _method(zip_name):
        """unarchive initial gear to simulate running inside a real gear.

        This will delete and then install: config.json input/ output/ work/ freesurfer/

        Args:
            zip_name (str): name of zip file that holds simulated gear.
        """

        gear_tests = Path("/src/tests/data/gear_tests/")
        if not gear_tests.exists():  # fix for running in circleci
            gear_tests = FWV0 / "tests/data/gear_tests/"

        print("\nRemoving previous gear...")

        for file_name in [
                "config.json",
                "time_output.txt",
        ]:
            if Path(FWV0 / file_name).exists():
                Path(FWV0 / file_name).unlink()

        for dir_name in [
                "input",
                "output",
                "work",
                "freesurfer",
                "templateflow",
                "unzip-work-dir",
                "unzip-fs-subjects-dir",
                "unzip-previous-results",
        ]:
            path = Path(FWV0 / dir_name)
            if path.exists():
                print(f"shutil.rmtree({str(path)}")
                shutil.rmtree(path)

        print(f'\ninstalling new gear, "{zip_name}"...')
        unzip_archive(gear_tests / zip_name, str(FWV0))

Example #4

Show file

File: test_zip.py Project: flywheel-apps/bids-fmriprep

def test_zip_selected_works(create_test_files, caplog, search_caplog):

    caplog.set_level(logging.DEBUG)

    work_dir = create_test_files
    work_path = work_dir.parents[0]
    dest_zip = work_path / "destination_zip.zip"
    files = ["two/hee", "hey", "missing_file"]
    folders = ["one/three", "else", "missing_dir"]

    zip_selected(work_path, work_dir.name, dest_zip, files, folders)

    unzip_dir = work_path / "unzip"
    unzip_archive(dest_zip, unzip_dir)

    assert dest_zip.exists()
    assert (unzip_dir / "test/four/hey").exists()
    assert (unzip_dir / "test/one/hey").exists()
    assert (unzip_dir / "test/one/three/now").exists()
    assert (unzip_dir / "test/something/else/altogether").exists()
    assert (unzip_dir / "test/two/hee").exists()
    assert search_caplog(caplog, "Zipping test/two/hee")
    assert search_caplog(caplog, "Looked for missing_file but")
    assert search_caplog(caplog, "Looked for missing_dir but")

Example #5

Show file

File: run.py Project: flywheel-apps/bids-fmriprep

def main(gtk_context):

    FWV0 = Path.cwd()
    log.info("Running gear in %s", FWV0)

    gtk_context.log_config()

    # Errors and warnings will be always logged when they are detected.
    # Keep a list of errors and warning to print all in one place at end of log
    # Any errors will prevent the command from running and will cause exit(1)
    errors = []
    warnings = []

    output_dir = gtk_context.output_dir
    log.info("output_dir is %s", output_dir)
    work_dir = gtk_context.work_dir
    log.info("work_dir is %s", work_dir)
    gear_name = gtk_context.manifest["name"]

    # run-time configuration options from the gear's context.json
    config = gtk_context.config

    dry_run = config.get("gear-dry-run")

    # Given the destination container, figure out if running at the project,
    # subject, or session level.
    destination_id = gtk_context.destination["id"]
    hierarchy = get_analysis_run_level_and_hierarchy(gtk_context.client,
                                                     destination_id)

    # This is the label of the project, subject or session and is used
    # as part of the name of the output files.
    run_label = make_file_name_safe(hierarchy["run_label"])

    # Output will be put into a directory named as the destination id.
    # This allows the raw output to be deleted so that a zipped archive
    # can be returned.
    output_analysis_id_dir = output_dir / destination_id
    log.info("Creating output directory %s", output_analysis_id_dir)
    if Path(output_analysis_id_dir).exists():
        log.info(
            "Not actually creating output directory %s because it exists.  This must be a test",
            output_analysis_id_dir,
        )
    else:
        Path(output_analysis_id_dir).mkdir()

    environ = get_and_log_environment()

    # set # threads and max memory to use
    config["n_cpus"], config["omp-nthreads"] = set_n_cpus(
        config.get("n_cpus"), config.get("omp-nthreads"))
    config["mem"] = set_mem_mb(config.get("mem_mb"))

    environ["OMP_NUM_THREADS"] = str(config["omp-nthreads"])

    # All writeable directories need to be set up in the current working directory

    orig_subject_dir = Path(environ["SUBJECTS_DIR"])
    subjects_dir = FWV0 / "freesurfer/subjects"
    environ["SUBJECTS_DIR"] = str(subjects_dir)
    if not subjects_dir.exists():  # needs to be created unless testing
        subjects_dir.mkdir(parents=True)
        (subjects_dir / "fsaverage").symlink_to(orig_subject_dir / "fsaverage")
        (subjects_dir / "fsaverage5").symlink_to(orig_subject_dir /
                                                 "fsaverage5")
        (subjects_dir / "fsaverage6").symlink_to(orig_subject_dir /
                                                 "fsaverage6")

    bids_filter_file_path = gtk_context.get_input_path("bids-filter-file")
    if bids_filter_file_path:
        paths = list(Path("input/bids-filter-file").glob("*"))
        log.info("Using provided PyBIDS filter file %s", str(paths[0]))
        config["bids-filter-file"] = str(paths[0])

    previous_work_zip_file_path = gtk_context.get_input_path("work-dir")
    if previous_work_zip_file_path:
        paths = list(Path("input/work-dir").glob("*"))
        log.info("Using provided fMRIPrep intermediate work file %s",
                 str(paths[0]))
        unzip_dir = FWV0 / "unzip-work-dir"
        unzip_dir.mkdir(parents=True)
        unzip_archive(paths[0], unzip_dir)
        for a_dir in unzip_dir.glob("*/*"):
            if (
                    a_dir.name == "bids"
            ):  # skip previous bids directory so current bids data will be used
                log.info("Found %s, but ignoring it to use current bids data",
                         a_dir.name)
            else:
                log.info("Found %s", a_dir.name)
                a_dir.rename(FWV0 / "work" / a_dir.name)
        hash_file = list(
            Path("work/fmriprep_wf/").glob("fsdir_run_*/_0x*.json"))[0]
        if hash_file.exists():
            with open(hash_file) as json_file:
                data = json.load(json_file)
                old_tmp_path = data[0][1]
                old_tmp_name = old_tmp_path.split("/")[2]
                log.info("Found old tmp name: %s", old_tmp_name)
                cur_tmp_name = str(FWV0).split("/")[2]
                # rename the directory to the old name
                Path("/tmp/" + cur_tmp_name).replace(
                    Path("/tmp/" + old_tmp_name))
                # create a symbolic link using the new name to the old name just in case
                Path("/tmp/" + cur_tmp_name).symlink_to(
                    Path("/tmp/" + old_tmp_name), target_is_directory=True)
                # update all variables to have the old directory name in them
                FWV0 = Path("/tmp/" + old_tmp_name + "/flywheel/v0")
                output_dir = str(output_dir).replace(cur_tmp_name,
                                                     old_tmp_name)
                output_analysis_id_dir = Path(
                    str(output_analysis_id_dir).replace(
                        cur_tmp_name, old_tmp_name))
                log.info("new output directory is: %s", output_dir)
                work_dir = Path(
                    str(work_dir).replace(cur_tmp_name, old_tmp_name))
                log.info("new work directory is: %s", work_dir)
                subjects_dir = Path(
                    str(subjects_dir).replace(cur_tmp_name, old_tmp_name))
                config["fs-subjects-dir"] = subjects_dir
                log.info("new FreeSurfer subjects directory is: %s",
                         subjects_dir)
                # for old work to be recognized, switch to running from the old path
                os.chdir(FWV0)
                log.info("cd %s", FWV0)
        else:
            log.info("Could not find hash file")
        config["work-dir"] = str(FWV0 / "work")

    subject_zip_file_path = gtk_context.get_input_path("fs-subjects-dir")
    if subject_zip_file_path:
        paths = list(Path("input/fs-subjects-dir").glob("*"))
        log.info("Using provided Freesurfer subject file %s", str(paths[0]))
        unzip_dir = FWV0 / "unzip-fs-subjects-dir"
        unzip_dir.mkdir(parents=True)
        unzip_archive(paths[0], unzip_dir)
        for a_subject in unzip_dir.glob("*/*"):
            if (subjects_dir / a_subject.name).exists():
                log.info("Found %s but using existing", a_subject.name)
            else:
                log.info("Found %s", a_subject.name)
                a_subject.rename(subjects_dir / a_subject.name)
        config["fs-subjects-dir"] = subjects_dir

    previous_results_zip_file_path = gtk_context.get_input_path(
        "previous-results")
    if previous_results_zip_file_path:
        paths = list(Path("input/previous-results").glob("*"))
        log.info("Using provided fMRIPrep previous results file %s",
                 str(paths[0]))
        unzip_dir = FWV0 / "unzip-previous-results"
        unzip_dir.mkdir(parents=True)
        unzip_archive(paths[0], unzip_dir)
        for a_dir in unzip_dir.glob("*/*"):
            log.info("Found %s", a_dir.name)
            a_dir.rename(output_analysis_id_dir / a_dir.name)

    environ["FS_LICENSE"] = str(FWV0 / "freesurfer/license.txt")

    license_list = list(Path("input/freesurfer_license").glob("*"))
    if len(license_list) > 0:
        fs_license_path = license_list[0]
    else:
        fs_license_path = ""
    install_freesurfer_license(
        str(fs_license_path),
        config.get("gear-FREESURFER_LICENSE"),
        gtk_context.client,
        destination_id,
        FREESURFER_LICENSE,
    )

    # TemplateFlow seems to be baked in to the container since 2021-10-07 16:25:12 so this is not needed
    # templateflow_dir = FWV0 / "templateflow"
    # templateflow_dir.mkdir()
    # environ["SINGULARITYENV_TEMPLATEFLOW_HOME"] = str(templateflow_dir)
    # environ["TEMPLATEFLOW_HOME"] = str(templateflow_dir)

    command = generate_command(config, work_dir, output_analysis_id_dir,
                               errors, warnings)

    # Download BIDS Formatted data
    if len(errors) == 0:

        # Create HTML file that shows BIDS "Tree" like output
        tree = True
        tree_title = f"{gear_name} BIDS Tree"

        error_code = download_bids_for_runlevel(
            gtk_context,
            hierarchy,
            tree=tree,
            tree_title=tree_title,
            src_data=DOWNLOAD_SOURCE,
            folders=DOWNLOAD_MODALITIES,
            dry_run=dry_run,
            do_validate_bids=config.get("gear-run-bids-validation"),
        )
        if error_code > 0 and not config.get("gear-ignore-bids-errors"):
            errors.append(f"BIDS Error(s) detected.  Did not run {CONTAINER}")

    else:
        log.info("Did not download BIDS because of previous errors")
        print(errors)

    # Don't run if there were errors or if this is a dry run
    return_code = 0

    try:

        if len(errors) > 0:
            return_code = 1
            log.info("Command was NOT run because of previous errors.")

        elif dry_run:
            e = "gear-dry-run is set: Command was NOT run."
            log.warning(e)
            warnings.append(e)
            pretend_it_ran(destination_id)

        else:
            if config["gear-log-level"] != "INFO":
                # show what's in the current working directory just before running
                os.system("tree -al .")

            if "gear-timeout" in config:
                command = [f"timeout {config['gear-timeout']}"] + command

            # This is what it is all about
            exec_command(
                command,
                environ=environ,
                dry_run=dry_run,
                shell=True,
                cont_output=True,
            )

    except RuntimeError as exc:
        return_code = 1
        errors.append(exc)
        log.critical(exc)
        log.exception("Unable to execute command.")

    finally:

        # Save time, etc. resources used in metadata on analysis
        if Path("time_output.txt").exists():  # some tests won't have this file
            metadata = {
                "analysis": {
                    "info": {
                        "resources used": {},
                    },
                },
            }
            with open("time_output.txt") as file:
                for line in file:
                    if ":" in line:
                        if (
                                "Elapsed" in line
                        ):  # special case "Elapsed (wall clock) time (h:mm:ss or m:ss): 0:08.11"
                            sline = re.split(r"\):", line)
                            sline[0] += ")"
                        else:
                            sline = line.split(":")
                        key = sline[0].strip()
                        val = sline[1].strip(' "\n')
                        metadata["analysis"]["info"]["resources used"][
                            key] = val
            with open(f"{output_dir}/.metadata.json", "w") as fff:
                json.dump(metadata, fff)
                log.info(f"Wrote {output_dir}/.metadata.json")

        # Cleanup, move all results to the output directory

        # Remove all fsaverage* directories
        if not config.get("gear-keep-fsaverage"):
            path = output_analysis_id_dir / "freesurfer"
            fsavg_dirs = path.glob("fsaverage*")
            for fsavg in fsavg_dirs:
                log.info("deleting %s", str(fsavg))
                shutil.rmtree(fsavg)
        else:
            log.info("Keeping fsaverage directories")

        # zip entire output/<analysis_id> folder into
        #  <gear_name>_<project|subject|session label>_<analysis.id>.zip
        zip_file_name = gear_name + f"_{run_label}_{destination_id}.zip"
        zip_output(
            str(output_dir),
            destination_id,
            zip_file_name,
            dry_run=False,
            exclude_files=None,
        )

        # Make archives for result *.html files for easy display on platform
        zip_htmls(output_dir, destination_id,
                  output_analysis_id_dir / BIDS_APP)

        # possibly save ALL intermediate output
        if config.get("gear-save-intermediate-output"):
            zip_all_intermediate_output(destination_id, gear_name, output_dir,
                                        work_dir, run_label)

        # possibly save intermediate files and folders
        zip_intermediate_selected(
            config.get("gear-intermediate-files"),
            config.get("gear-intermediate-folders"),
            destination_id,
            gear_name,
            output_dir,
            work_dir,
            run_label,
        )

        # clean up: remove output that was zipped
        if Path(output_analysis_id_dir).exists():
            if not config.get("gear-keep-output"):

                log.debug('removing output directory "%s"',
                          str(output_analysis_id_dir))
                shutil.rmtree(output_analysis_id_dir)

            else:
                log.info('NOT removing output directory "%s"',
                         str(output_analysis_id_dir))

        else:
            log.info("Output directory does not exist so it cannot be removed")

        # Report errors and warnings at the end of the log so they can be easily seen.
        if len(warnings) > 0:
            msg = "Previous warnings:\n"
            for warn in warnings:
                msg += "  Warning: " + str(warn) + "\n"
            log.info(msg)

        if len(errors) > 0:
            msg = "Previous errors:\n"
            for err in errors:
                if str(type(err)).split("'")[1] == "str":
                    # show string
                    msg += "  Error msg: " + str(err) + "\n"
                else:  # show type (of error) and error message
                    err_type = str(type(err)).split("'")[1]
                    msg += f"  {err_type}: {str(err)}\n"
            log.info(msg)
            return_code = 1

    log.info("%s Gear is done.  Returning %s", CONTAINER, return_code)

    return return_code

Example #6

Show file

def test_dry_run_works(tmp_path, caplog, install_gear, search_caplog_contains,
                       search_caplog):

    caplog.set_level(logging.DEBUG)

    user_json = Path(Path.home() / ".config/flywheel/user.json")
    if not user_json.exists():
        TestCase.skipTest("", f"No API key available in {str(user_json)}")
    with open(user_json) as json_file:
        data = json.load(json_file)
        if "ga" not in data["key"]:
            TestCase.skipTest("", "Not logged in to ga.")

    FWV0 = Path.cwd()

    install_gear("dry_run.zip")

    with flywheel_gear_toolkit.GearToolkitContext(
            input_args=[]) as gtk_context:

        status = run.main(gtk_context)

        assert status == 0
        assert (FWV0 / "work/bids/.bidsignore").exists()
        assert (FWV0 / "freesurfer/subjects/sub-42/label/empty").exists()
        assert search_caplog_contains(
            caplog,
            "command is",
            "--bids-filter-file=input/bids-filter-file/PyBIDS_filter.json",
        )
        assert search_caplog_contains(
            caplog, "--work-dir",
            "flywheel/v0/output/61608fc7dbf5f9487f231006")

        # Since the "work-dir" option was used, the gear created the old data's /tmp/ path and
        # ran in it, and a symbolic link was created from the current /tmp/ path to the old one.
        # The "current" path was created just before these tests started (in conftest.py). E.g.:
        # % ls /tmp
        # gear-temp-dir-2cde80oy -> /tmp/gear-temp-dir-yhv7hq29
        # gear-temp-dir-yhv7hq29
        # where "yhv7hq29" is the old random part found in the provided "work-dir" data and
        # "2cde80oy" is the new random part of the /tmp/ path created to run these tests.
        # The new part is generated randomly so it will change but the old one is in the gear test
        # data provided as an input to the gear.
        old_tmp_path = Path(*(list(Path.cwd().parts)[:3]))
        assert "yhv7hq29" in list(old_tmp_path.parts)[2]
        current_tmp_path = Path(*(list(FWV0.parts)[:3]))
        assert current_tmp_path.is_symlink()
        # now change back to the original /tmp/ path so that following tests will not be affected
        current_tmp_path.unlink()
        old_tmp_path.replace(current_tmp_path)

        assert search_caplog_contains(caplog, "command is", "participant")
        assert search_caplog_contains(caplog, "command is", "'arg1', 'arg2'")
        assert search_caplog(caplog, "No BIDS errors detected.")
        assert search_caplog(caplog, "including sub-TOME3024/figures")
        assert search_caplog(caplog, "Zipping work directory")
        assert search_caplog(caplog,
                             "Zipping work/bids/dataset_description.json")
        assert search_caplog(
            caplog, "Zipping work/bids/sub-TOME3024/ses-Session2/anat")
        assert search_caplog(
            caplog, "Looked for anatsub-TOME3024_desc-about_T1w.html")
        assert search_caplog(caplog, "Warning: gear-dry-run is set")

        # Make sure platform-viewable archive includes figures
        html_zip_files = list(FWV0.glob("output/sub-TOME3024_*.html.zip"))
        html_zip_file = html_zip_files[0]
        assert html_zip_file.exists()
        unzip_archive(html_zip_file, tmp_path)
        assert Path(tmp_path / "index.html").exists()
        assert Path(
            tmp_path /
            "sub-TOME3024/figures/sub-TOME3024_ses-Session2_acq-MPRHA_dseg.svg"
        ).exists()