def update_species(parser, args): ddir = DataDir(args.panels_dir) logger.info(f"Loaded panels metdata from {ddir.root_dir}") if args.remove: if args.species == "all": raise NotImplementedError("Can only delete individual species") ddir.remove_species(args.species) logger.info(f"Removed species {args.species}") else: if args.species == "all": ddir.update_all_species() else: ddir.update_species(args.species)
def ref_data_from_args(args): if args.species == "custom": if args.custom_probe_set_path is None: raise ValueError( "Must use --custom_probe_set_path option if the species is 'custom'" ) ref_data = { "fasta_files": [args.custom_probe_set_path], "var_to_res_json": args.custom_variant_to_resistance_json, "hierarchy_json": None, "lineage_json": args.custom_lineage_json, "kmer": args.kmer, "version": "custom", "species_phylo_group": None, } else: data_dir = DataDir(args.panels_dir) species_dir = data_dir.get_species_dir(args.species) if args.panel is not None: species_dir.set_panel(args.panel) ref_data = { "fasta_files": species_dir.fasta_files(), "var_to_res_json": species_dir.json_file("amr"), "hierarchy_json": species_dir.json_file("hierarchy"), "lineage_json": species_dir.json_file("lineage"), "kmer": species_dir.kmer(), "version": species_dir.version(), "species_phylo_group": species_dir.species_phylo_group(), } if ref_data["lineage_json"] is None: ref_data["lineage_dict"] = None else: ref_data["lineage_dict"] = load_json(ref_data["lineage_json"]) return ref_data
def test_data_dir(): # This is a long test that runs through the entire reference data process # that a user might do: update metadata, install a species, update a # species, and remove a species. Checks along the way that all the metadata # etc is correct. # Create an empty data dir temp_dir = "tmp.test_data_dir" subprocess.check_output(f"rm -rf {temp_dir}", shell=True) ddir = DataDir(temp_dir) assert ddir.manifest == {} ddir.create_root() assert os.path.exists(temp_dir) assert not ddir.is_locked() ddir.start_lock() assert ddir.is_locked() ddir.stop_lock() assert not ddir.is_locked() # Update the manifest, which has species available for installation species1_tarball = os.path.join(data_dir, "species1_data.20200101.tar.gz") manifest_data = { "species1": {"version": "20200101", "url": species1_tarball}, "species2": {"version": "20190211", "url": "species2_url"}, } manifest_json = "tmp.species_data_test.json" with open(manifest_json, "w") as f: json.dump(manifest_data, f) ddir.update_manifest(filename=manifest_json) os.unlink(manifest_json) expect_manifest = { "species1": {"installed": None, "latest": copy.copy(manifest_data["species1"])}, "species2": {"installed": None, "latest": copy.copy(manifest_data["species2"])}, } assert ddir.manifest == expect_manifest assert not ddir.is_locked() assert ddir.all_species_list() == ["species1", "species2"] assert ddir.installed_species() == [] assert not ddir.species_is_installed("species1") assert not ddir.species_is_installed("species2") assert ddir.get_species_dir("species1") is None assert ddir.get_species_dir("species2") is None with pytest.raises(ValueError): ddir.get_species_dir("unknown species") # Add species1 from a tarball file on disk, and check everything looks # correctly updated. ddir.add_or_replace_species_data(species1_tarball) assert not ddir.is_locked() assert ddir.species_is_installed("species1") assert ddir.installed_species() == ["species1"] expect_manifest_with_species1 = copy.deepcopy(expect_manifest) expect_manifest_with_species1["species1"]["installed"] = copy.copy(manifest_data["species1"]) assert ddir.manifest == expect_manifest_with_species1 # Want to test we can get species1 from the data dir. # But getting filesystem delay issues with the species1 manifest.json file, # which is loaded by ddir.get_species_dir("species1") below. Python # thinks it's there according to os.path.exists(), but then opening the # file throws a FileNotFoundError?! Hence the next loop. for i in range(3): time.sleep(0.5) try: species1 = ddir.get_species_dir("species1") except: pass if species1 is not None: break assert species1 is not None # Make a new metadata manifest, which has a newer version of species1, # and use it to update the data directory. species1_tarball = os.path.join(data_dir, "species1_data.20200801.tar.gz") manifest_data = { "species1": {"version": "20200801", "url": species1_tarball}, "species2": {"version": "20190211", "url": "species2_url"}, } manifest_json = "tmp.species_data_test.json" with open(manifest_json, "w") as f: json.dump(manifest_data, f) ddir.update_manifest(filename=manifest_json) os.unlink(manifest_json) # If we try to update species1 again, should fail because it's already # installed and we didn't force it. But should not have removed the existing # install of species1. with pytest.raises(RuntimeError): ddir.add_or_replace_species_data(species1_tarball) assert ddir.is_locked() ddir.stop_lock() assert ddir.species_is_installed("species1") # Now update species1 with the force option and check it worked. ddir.add_or_replace_species_data(species1_tarball, force=True) expect_manifest_with_species1["species1"]["latest"] = copy.copy(manifest_data["species1"]) expect_manifest_with_species1["species1"]["installed"] = copy.copy(manifest_data["species1"]) assert ddir.manifest == expect_manifest_with_species1 assert ddir.species_is_installed("species1") assert ddir.installed_species() == ["species1"] # Test removing species. Unknown species should fail, species1 should work with pytest.raises(ValueError): ddir.remove_species("unknown species") ddir.remove_species("species1") assert not ddir.species_is_installed("species1") assert ddir.installed_species() == [] expect_manifest["species1"]["latest"] = copy.copy(manifest_data["species1"]) assert ddir.manifest == expect_manifest shutil.rmtree(temp_dir)
def describe(parser, args): ddir = DataDir(args.panels_dir) print(f"Gathering data from {ddir.root_dir}") ddir.print_panels_summary()
def update_metadata(parser, args): ddir = DataDir(args.panels_dir) ddir.update_manifest(filename=args.filename)