def install_gear(zip_name): """unarchive initial gear to simulate running inside a real gear. This will delete and then install: config.json input/ output/ work/ Args: zip_name (str): name of zip file that holds simulated gear. """ gear_tests = "/src/tests/data/gear_tests/" gear = "/flywheel/v0/" os.chdir( gear) # Make sure we're in the right place (gear works in "work/" dir) print("\nRemoving previous gear...") if Path(gear + "config.json").exists(): Path(gear + "config.json").unlink() for dir_name in ["input", "output", "work"]: path = Path(gear + dir_name) if path.exists(): shutil.rmtree(path) print(f'\ninstalling new gear, "{zip_name}"...') unzip_archive(gear_tests + zip_name, gear) # swap in user's api-key if there is one (fake) in the config config_json = Path("./config.json") if config_json.exists(): print(f"Found {str(config_json)}") api_dict = None with open(config_json) as cjf: config_dict = json.load(cjf) pprint(config_dict["inputs"]) if "api_key" in config_dict["inputs"]: print(f'Found "api_key" in config_dict["inputs"]') user_json = Path(Path.home() / ".config/flywheel/user.json") if user_json.exists(): with open(user_json) as ujf: api_dict = json.load(ujf) config_dict["inputs"]["api_key"]["key"] = api_dict["key"] print(f"installing api-key...") else: print(f"{str(user_json)} not found. Can't get api key.") else: print(f'No "api_key" in config_dict["inputs"]') if api_dict: with open(config_json, "w") as cjf: json.dump(config_dict, cjf) else: print(f"{str(config_json)} does not exist. Can't set api key.")
def _method(zip_name): """unarchive initial gear to simulate running inside a real gear. This will delete and then install: config.json input/ output/ work/ Args: zip_name (str): name of zip file that holds simulated gear. """ gear_tests = "/src/tests/data/gear_tests/" if not Path(gear_tests).exists(): # fix for running in circleci gear_tests = "/flywheel/v0/tests/data/gear_tests/" gear = "/flywheel/v0/" os.chdir( gear ) # Make sure we're in the right place (gear works in "work/" dir) print("\nRemoving previous gear...") if Path(gear + "config.json").exists(): Path(gear + "config.json").unlink() for dir_name in ["input", "output", "work", "freesurfer"]: path = Path(gear + dir_name) if path.exists(): shutil.rmtree(path) print(f'\ninstalling new gear, "{zip_name}"...') unzip_archive(gear_tests + zip_name, gear) # move freesurfer license and subject directories to proper place gear_freesurfer = Path("freesurfer") if gear_freesurfer.exists(): if not SUBJECTS_DIR.exists(): print(f"WARNING: had to create {SUBJECTS_DIR}") SUBJECTS_DIR.mkdir(parents=True) gear_license = Path(gear_freesurfer / "license.txt") if gear_license.exists(): if Path(FS_DIR / "license.txt").exists(): Path(FS_DIR / "license.txt").unlink() shutil.move(str(gear_license), str(FS_DIR)) subjects = gear_freesurfer.glob("subjects/*") for subj in subjects: subj_name = subj.name if (SUBJECTS_DIR / subj_name).exists(): shutil.rmtree(SUBJECTS_DIR / subj_name) print(f"moving subject to {str(SUBJECTS_DIR / subj_name)}") shutil.move(str(subj), str(SUBJECTS_DIR))
def _method(zip_name): """unarchive initial gear to simulate running inside a real gear. This will delete and then install: config.json input/ output/ work/ freesurfer/ Args: zip_name (str): name of zip file that holds simulated gear. """ gear_tests = Path("/src/tests/data/gear_tests/") if not gear_tests.exists(): # fix for running in circleci gear_tests = FWV0 / "tests/data/gear_tests/" print("\nRemoving previous gear...") for file_name in [ "config.json", "time_output.txt", ]: if Path(FWV0 / file_name).exists(): Path(FWV0 / file_name).unlink() for dir_name in [ "input", "output", "work", "freesurfer", "templateflow", "unzip-work-dir", "unzip-fs-subjects-dir", "unzip-previous-results", ]: path = Path(FWV0 / dir_name) if path.exists(): print(f"shutil.rmtree({str(path)}") shutil.rmtree(path) print(f'\ninstalling new gear, "{zip_name}"...') unzip_archive(gear_tests / zip_name, str(FWV0))
def test_zip_selected_works(create_test_files, caplog, search_caplog): caplog.set_level(logging.DEBUG) work_dir = create_test_files work_path = work_dir.parents[0] dest_zip = work_path / "destination_zip.zip" files = ["two/hee", "hey", "missing_file"] folders = ["one/three", "else", "missing_dir"] zip_selected(work_path, work_dir.name, dest_zip, files, folders) unzip_dir = work_path / "unzip" unzip_archive(dest_zip, unzip_dir) assert dest_zip.exists() assert (unzip_dir / "test/four/hey").exists() assert (unzip_dir / "test/one/hey").exists() assert (unzip_dir / "test/one/three/now").exists() assert (unzip_dir / "test/something/else/altogether").exists() assert (unzip_dir / "test/two/hee").exists() assert search_caplog(caplog, "Zipping test/two/hee") assert search_caplog(caplog, "Looked for missing_file but") assert search_caplog(caplog, "Looked for missing_dir but")
def main(gtk_context): FWV0 = Path.cwd() log.info("Running gear in %s", FWV0) gtk_context.log_config() # Errors and warnings will be always logged when they are detected. # Keep a list of errors and warning to print all in one place at end of log # Any errors will prevent the command from running and will cause exit(1) errors = [] warnings = [] output_dir = gtk_context.output_dir log.info("output_dir is %s", output_dir) work_dir = gtk_context.work_dir log.info("work_dir is %s", work_dir) gear_name = gtk_context.manifest["name"] # run-time configuration options from the gear's context.json config = gtk_context.config dry_run = config.get("gear-dry-run") # Given the destination container, figure out if running at the project, # subject, or session level. destination_id = gtk_context.destination["id"] hierarchy = get_analysis_run_level_and_hierarchy(gtk_context.client, destination_id) # This is the label of the project, subject or session and is used # as part of the name of the output files. run_label = make_file_name_safe(hierarchy["run_label"]) # Output will be put into a directory named as the destination id. # This allows the raw output to be deleted so that a zipped archive # can be returned. output_analysis_id_dir = output_dir / destination_id log.info("Creating output directory %s", output_analysis_id_dir) if Path(output_analysis_id_dir).exists(): log.info( "Not actually creating output directory %s because it exists. This must be a test", output_analysis_id_dir, ) else: Path(output_analysis_id_dir).mkdir() environ = get_and_log_environment() # set # threads and max memory to use config["n_cpus"], config["omp-nthreads"] = set_n_cpus( config.get("n_cpus"), config.get("omp-nthreads")) config["mem"] = set_mem_mb(config.get("mem_mb")) environ["OMP_NUM_THREADS"] = str(config["omp-nthreads"]) # All writeable directories need to be set up in the current working directory orig_subject_dir = Path(environ["SUBJECTS_DIR"]) subjects_dir = FWV0 / "freesurfer/subjects" environ["SUBJECTS_DIR"] = str(subjects_dir) if not subjects_dir.exists(): # needs to be created unless testing subjects_dir.mkdir(parents=True) (subjects_dir / "fsaverage").symlink_to(orig_subject_dir / "fsaverage") (subjects_dir / "fsaverage5").symlink_to(orig_subject_dir / "fsaverage5") (subjects_dir / "fsaverage6").symlink_to(orig_subject_dir / "fsaverage6") bids_filter_file_path = gtk_context.get_input_path("bids-filter-file") if bids_filter_file_path: paths = list(Path("input/bids-filter-file").glob("*")) log.info("Using provided PyBIDS filter file %s", str(paths[0])) config["bids-filter-file"] = str(paths[0]) previous_work_zip_file_path = gtk_context.get_input_path("work-dir") if previous_work_zip_file_path: paths = list(Path("input/work-dir").glob("*")) log.info("Using provided fMRIPrep intermediate work file %s", str(paths[0])) unzip_dir = FWV0 / "unzip-work-dir" unzip_dir.mkdir(parents=True) unzip_archive(paths[0], unzip_dir) for a_dir in unzip_dir.glob("*/*"): if ( a_dir.name == "bids" ): # skip previous bids directory so current bids data will be used log.info("Found %s, but ignoring it to use current bids data", a_dir.name) else: log.info("Found %s", a_dir.name) a_dir.rename(FWV0 / "work" / a_dir.name) hash_file = list( Path("work/fmriprep_wf/").glob("fsdir_run_*/_0x*.json"))[0] if hash_file.exists(): with open(hash_file) as json_file: data = json.load(json_file) old_tmp_path = data[0][1] old_tmp_name = old_tmp_path.split("/")[2] log.info("Found old tmp name: %s", old_tmp_name) cur_tmp_name = str(FWV0).split("/")[2] # rename the directory to the old name Path("/tmp/" + cur_tmp_name).replace( Path("/tmp/" + old_tmp_name)) # create a symbolic link using the new name to the old name just in case Path("/tmp/" + cur_tmp_name).symlink_to( Path("/tmp/" + old_tmp_name), target_is_directory=True) # update all variables to have the old directory name in them FWV0 = Path("/tmp/" + old_tmp_name + "/flywheel/v0") output_dir = str(output_dir).replace(cur_tmp_name, old_tmp_name) output_analysis_id_dir = Path( str(output_analysis_id_dir).replace( cur_tmp_name, old_tmp_name)) log.info("new output directory is: %s", output_dir) work_dir = Path( str(work_dir).replace(cur_tmp_name, old_tmp_name)) log.info("new work directory is: %s", work_dir) subjects_dir = Path( str(subjects_dir).replace(cur_tmp_name, old_tmp_name)) config["fs-subjects-dir"] = subjects_dir log.info("new FreeSurfer subjects directory is: %s", subjects_dir) # for old work to be recognized, switch to running from the old path os.chdir(FWV0) log.info("cd %s", FWV0) else: log.info("Could not find hash file") config["work-dir"] = str(FWV0 / "work") subject_zip_file_path = gtk_context.get_input_path("fs-subjects-dir") if subject_zip_file_path: paths = list(Path("input/fs-subjects-dir").glob("*")) log.info("Using provided Freesurfer subject file %s", str(paths[0])) unzip_dir = FWV0 / "unzip-fs-subjects-dir" unzip_dir.mkdir(parents=True) unzip_archive(paths[0], unzip_dir) for a_subject in unzip_dir.glob("*/*"): if (subjects_dir / a_subject.name).exists(): log.info("Found %s but using existing", a_subject.name) else: log.info("Found %s", a_subject.name) a_subject.rename(subjects_dir / a_subject.name) config["fs-subjects-dir"] = subjects_dir previous_results_zip_file_path = gtk_context.get_input_path( "previous-results") if previous_results_zip_file_path: paths = list(Path("input/previous-results").glob("*")) log.info("Using provided fMRIPrep previous results file %s", str(paths[0])) unzip_dir = FWV0 / "unzip-previous-results" unzip_dir.mkdir(parents=True) unzip_archive(paths[0], unzip_dir) for a_dir in unzip_dir.glob("*/*"): log.info("Found %s", a_dir.name) a_dir.rename(output_analysis_id_dir / a_dir.name) environ["FS_LICENSE"] = str(FWV0 / "freesurfer/license.txt") license_list = list(Path("input/freesurfer_license").glob("*")) if len(license_list) > 0: fs_license_path = license_list[0] else: fs_license_path = "" install_freesurfer_license( str(fs_license_path), config.get("gear-FREESURFER_LICENSE"), gtk_context.client, destination_id, FREESURFER_LICENSE, ) # TemplateFlow seems to be baked in to the container since 2021-10-07 16:25:12 so this is not needed # templateflow_dir = FWV0 / "templateflow" # templateflow_dir.mkdir() # environ["SINGULARITYENV_TEMPLATEFLOW_HOME"] = str(templateflow_dir) # environ["TEMPLATEFLOW_HOME"] = str(templateflow_dir) command = generate_command(config, work_dir, output_analysis_id_dir, errors, warnings) # Download BIDS Formatted data if len(errors) == 0: # Create HTML file that shows BIDS "Tree" like output tree = True tree_title = f"{gear_name} BIDS Tree" error_code = download_bids_for_runlevel( gtk_context, hierarchy, tree=tree, tree_title=tree_title, src_data=DOWNLOAD_SOURCE, folders=DOWNLOAD_MODALITIES, dry_run=dry_run, do_validate_bids=config.get("gear-run-bids-validation"), ) if error_code > 0 and not config.get("gear-ignore-bids-errors"): errors.append(f"BIDS Error(s) detected. Did not run {CONTAINER}") else: log.info("Did not download BIDS because of previous errors") print(errors) # Don't run if there were errors or if this is a dry run return_code = 0 try: if len(errors) > 0: return_code = 1 log.info("Command was NOT run because of previous errors.") elif dry_run: e = "gear-dry-run is set: Command was NOT run." log.warning(e) warnings.append(e) pretend_it_ran(destination_id) else: if config["gear-log-level"] != "INFO": # show what's in the current working directory just before running os.system("tree -al .") if "gear-timeout" in config: command = [f"timeout {config['gear-timeout']}"] + command # This is what it is all about exec_command( command, environ=environ, dry_run=dry_run, shell=True, cont_output=True, ) except RuntimeError as exc: return_code = 1 errors.append(exc) log.critical(exc) log.exception("Unable to execute command.") finally: # Save time, etc. resources used in metadata on analysis if Path("time_output.txt").exists(): # some tests won't have this file metadata = { "analysis": { "info": { "resources used": {}, }, }, } with open("time_output.txt") as file: for line in file: if ":" in line: if ( "Elapsed" in line ): # special case "Elapsed (wall clock) time (h:mm:ss or m:ss): 0:08.11" sline = re.split(r"\):", line) sline[0] += ")" else: sline = line.split(":") key = sline[0].strip() val = sline[1].strip(' "\n') metadata["analysis"]["info"]["resources used"][ key] = val with open(f"{output_dir}/.metadata.json", "w") as fff: json.dump(metadata, fff) log.info(f"Wrote {output_dir}/.metadata.json") # Cleanup, move all results to the output directory # Remove all fsaverage* directories if not config.get("gear-keep-fsaverage"): path = output_analysis_id_dir / "freesurfer" fsavg_dirs = path.glob("fsaverage*") for fsavg in fsavg_dirs: log.info("deleting %s", str(fsavg)) shutil.rmtree(fsavg) else: log.info("Keeping fsaverage directories") # zip entire output/<analysis_id> folder into # <gear_name>_<project|subject|session label>_<analysis.id>.zip zip_file_name = gear_name + f"_{run_label}_{destination_id}.zip" zip_output( str(output_dir), destination_id, zip_file_name, dry_run=False, exclude_files=None, ) # Make archives for result *.html files for easy display on platform zip_htmls(output_dir, destination_id, output_analysis_id_dir / BIDS_APP) # possibly save ALL intermediate output if config.get("gear-save-intermediate-output"): zip_all_intermediate_output(destination_id, gear_name, output_dir, work_dir, run_label) # possibly save intermediate files and folders zip_intermediate_selected( config.get("gear-intermediate-files"), config.get("gear-intermediate-folders"), destination_id, gear_name, output_dir, work_dir, run_label, ) # clean up: remove output that was zipped if Path(output_analysis_id_dir).exists(): if not config.get("gear-keep-output"): log.debug('removing output directory "%s"', str(output_analysis_id_dir)) shutil.rmtree(output_analysis_id_dir) else: log.info('NOT removing output directory "%s"', str(output_analysis_id_dir)) else: log.info("Output directory does not exist so it cannot be removed") # Report errors and warnings at the end of the log so they can be easily seen. if len(warnings) > 0: msg = "Previous warnings:\n" for warn in warnings: msg += " Warning: " + str(warn) + "\n" log.info(msg) if len(errors) > 0: msg = "Previous errors:\n" for err in errors: if str(type(err)).split("'")[1] == "str": # show string msg += " Error msg: " + str(err) + "\n" else: # show type (of error) and error message err_type = str(type(err)).split("'")[1] msg += f" {err_type}: {str(err)}\n" log.info(msg) return_code = 1 log.info("%s Gear is done. Returning %s", CONTAINER, return_code) return return_code
def test_dry_run_works(tmp_path, caplog, install_gear, search_caplog_contains, search_caplog): caplog.set_level(logging.DEBUG) user_json = Path(Path.home() / ".config/flywheel/user.json") if not user_json.exists(): TestCase.skipTest("", f"No API key available in {str(user_json)}") with open(user_json) as json_file: data = json.load(json_file) if "ga" not in data["key"]: TestCase.skipTest("", "Not logged in to ga.") FWV0 = Path.cwd() install_gear("dry_run.zip") with flywheel_gear_toolkit.GearToolkitContext( input_args=[]) as gtk_context: status = run.main(gtk_context) assert status == 0 assert (FWV0 / "work/bids/.bidsignore").exists() assert (FWV0 / "freesurfer/subjects/sub-42/label/empty").exists() assert search_caplog_contains( caplog, "command is", "--bids-filter-file=input/bids-filter-file/PyBIDS_filter.json", ) assert search_caplog_contains( caplog, "--work-dir", "flywheel/v0/output/61608fc7dbf5f9487f231006") # Since the "work-dir" option was used, the gear created the old data's /tmp/ path and # ran in it, and a symbolic link was created from the current /tmp/ path to the old one. # The "current" path was created just before these tests started (in conftest.py). E.g.: # % ls /tmp # gear-temp-dir-2cde80oy -> /tmp/gear-temp-dir-yhv7hq29 # gear-temp-dir-yhv7hq29 # where "yhv7hq29" is the old random part found in the provided "work-dir" data and # "2cde80oy" is the new random part of the /tmp/ path created to run these tests. # The new part is generated randomly so it will change but the old one is in the gear test # data provided as an input to the gear. old_tmp_path = Path(*(list(Path.cwd().parts)[:3])) assert "yhv7hq29" in list(old_tmp_path.parts)[2] current_tmp_path = Path(*(list(FWV0.parts)[:3])) assert current_tmp_path.is_symlink() # now change back to the original /tmp/ path so that following tests will not be affected current_tmp_path.unlink() old_tmp_path.replace(current_tmp_path) assert search_caplog_contains(caplog, "command is", "participant") assert search_caplog_contains(caplog, "command is", "'arg1', 'arg2'") assert search_caplog(caplog, "No BIDS errors detected.") assert search_caplog(caplog, "including sub-TOME3024/figures") assert search_caplog(caplog, "Zipping work directory") assert search_caplog(caplog, "Zipping work/bids/dataset_description.json") assert search_caplog( caplog, "Zipping work/bids/sub-TOME3024/ses-Session2/anat") assert search_caplog( caplog, "Looked for anatsub-TOME3024_desc-about_T1w.html") assert search_caplog(caplog, "Warning: gear-dry-run is set") # Make sure platform-viewable archive includes figures html_zip_files = list(FWV0.glob("output/sub-TOME3024_*.html.zip")) html_zip_file = html_zip_files[0] assert html_zip_file.exists() unzip_archive(html_zip_file, tmp_path) assert Path(tmp_path / "index.html").exists() assert Path( tmp_path / "sub-TOME3024/figures/sub-TOME3024_ses-Session2_acq-MPRHA_dseg.svg" ).exists()