def update_manifest(self, filename=None, url=None): self.create_root() error_message = f"Cannot update panel list. Looks like another process is already trying to modify the mykrobe data directory {self.root_dir}. If this is not the case, then you can delete this file and try again: {self.lock_file}" self.start_lock(error_message) if filename is not None: logger.info( f"Getting latest panel information from file {filename}") new_manifest = load_json(filename) else: if url is None: url = MANIFEST_URL try: logger.info(f"Getting panels information from {url}") new_manifest = json.loads(requests.get(url).text) except: raise RuntimeError( f"Error getting latest panel information from {url}") for species, species_dict in new_manifest.items(): logger.info(f"Updating metadata for species {species}") if species not in self.manifest: self.manifest[species] = {"installed": None} self.manifest[species]["latest"] = species_dict self.save_manifest() self.stop_lock() logger.info( f"Finished updating metadata in panels directory {self.root_dir}")
def __init__(self, root_dir): self.root_dir = os.path.abspath(root_dir) if not os.path.exists(self.root_dir): raise FileNotFoundError(f"Species directory {self.root_dir} not found.") self.manifest_json = os.path.join(root_dir, "manifest.json") if not os.path.exists(self.manifest_json): raise FileNotFoundError(f"Manifest file not found in species directory {self.root_dir}. Expected to find {self.manifest_json}.") self.panel = None self.manifest = load_json(self.manifest_json) self.set_panel(self.default_panel())
def file_paths_to_combined_dict(l): ana = {} for f in l: try: data = load_json(f) except ValueError as e: sys.stderr.write(str(e) + " %s \n" % f) else: assert data.keys()[0] not in ana ana.update(data) return ana
def get_drugs(drug_list): drugs = [] for f in args.files: try: d = load_json(f) except ValueError: d = {} for drug in drug_list: if drug not in drugs: drugs.append(drug) return drugs
def __init__(self, variant_calls, called_genes, base_json=None, depth_threshold=3, ignore_filtered=True, ignore_minor_calls=False, variant_to_resistance_json_fp=None): if base_json is None: base_json = {} if variant_to_resistance_json_fp: self.variant_or_gene_name_to_resistance_drug = load_json( variant_to_resistance_json_fp) else: self.variant_or_gene_name_to_resistance_drug = load_json( self.default_variant_to_resistance_drug) self.variant_calls = variant_calls self.called_genes = called_genes self.drugs = self._get_drug_list_from_variant_to_resistance_drug() self.resistance_prediction = self._create_initial_resistance_prediction( ) self.out_json = base_json self._cn_threshold = { "ermA": 0.19, "ermB": 0.19, "ermC": 0.19, "ermT": 0.19, "ermY": 0.19, "fusA": 0.03, "fusC": 0.03, "aacAaphD": 0.04, "mecA": 0.06, "mupA": 0.21, "blaZ": 0.04, "tetK": 0.13 } self.depth_threshold = depth_threshold self.ignore_filtered = ignore_filtered self.ignore_minor_calls = ignore_minor_calls
def __init__(self, phylo_group_covgs, sub_complex_covgs, species_covgs, lineage_covgs, verbose=False, hierarchy_json_file=None): self.phylo_group_covgs = phylo_group_covgs self.sub_complex_covgs = sub_complex_covgs self.species_covgs = species_covgs self.lineage_covgs = lineage_covgs self.out_json = {} self.threshold = {} self.verbose = verbose try: self.hierarchy = Hierarchy(load_json(hierarchy_json_file)) except TypeError: self.hierarchy = {}
def ref_data_from_args(args): if args.species == "custom": if args.custom_probe_set_path is None: raise ValueError( "Must use --custom_probe_set_path option if the species is 'custom'" ) ref_data = { "fasta_files": [args.custom_probe_set_path], "var_to_res_json": args.custom_variant_to_resistance_json, "hierarchy_json": None, "lineage_json": args.custom_lineage_json, "kmer": args.kmer, "version": "custom", "species_phylo_group": None, } else: data_dir = DataDir(args.panels_dir) species_dir = data_dir.get_species_dir(args.species) if args.panel is not None: species_dir.set_panel(args.panel) ref_data = { "fasta_files": species_dir.fasta_files(), "var_to_res_json": species_dir.json_file("amr"), "hierarchy_json": species_dir.json_file("hierarchy"), "lineage_json": species_dir.json_file("lineage"), "kmer": species_dir.kmer(), "version": species_dir.version(), "species_phylo_group": species_dir.species_phylo_group(), } if ref_data["lineage_json"] is None: ref_data["lineage_dict"] = None else: ref_data["lineage_dict"] = load_json(ref_data["lineage_json"]) return ref_data
def diff_stats(stats1, stats2): TP = stats1.TP - stats2.TP FP = stats1.FP - stats2.FP TN = stats1.TN - stats2.TN FN = stats1.FN - stats2.FN sensitivity = stats1.sensitivity - stats2.sensitivity specificity = stats1.specificity - stats2.specificity total = stats1.total - stats2.total return [total, TP, FP, TN, FN, sensitivity, specificity] # Load data truth = load_json(args.truth) ana1 = file_paths_to_combined_dict(args.ana1) ana2 = file_paths_to_combined_dict(args.ana2) truth_susceptibility = combined_dict_to_result_objects(truth) ana1_susceptibility = combined_dict_to_result_objects(ana1) ana2_susceptibility = combined_dict_to_result_objects(ana2) # sample_ids = get_union_sample_ids( ana1, ana2) sample_ids = get_intersection_sample_ids(ana1, ana2) if args.markdown: row_delim = " | " else: row_delim = "\t"
def run_main(parser, args): args = parser.parse_args() verbose = True if args.ont: args.expected_error_rate = 0.15 args.filters = ["LOW_GT_CONF"] args.model = "kmer_count" logger.debug("Setting expected error rate to %s (--ont)" % args.expected_error_rate) logger.debug( "Removing LOW_PERCENT_COVERAGE filter (increases sensitivity - in particular for ONT data)" ) if args.min_variant_conf is None: args.min_variant_conf = 100 cp = CoverageParser( sample=args.sample, panel_file_paths=[args.probe_set], seq=args.seq, ctx=args.ctx, kmer=args.kmer, force=args.force, verbose=verbose, tmp_dir=args.tmp, skeleton_dir=args.skeleton_dir, threads=args.threads, memory=args.memory, ) cp.run() if args.expected_depth is None: args.expected_depth = cp.estimate_depth() base_json = {args.sample: {}} base_json[args.sample]["probe_set"] = args.probe_set if args.seq: base_json[args.sample]["files"] = args.seq else: base_json[args.sample]["files"] = args.ctx base_json[args.sample]["kmer"] = args.kmer base_json[args.sample]["version"] = __version__ if args.lineage is None: lineage_dict = None else: lineage_dict = load_json(args.lineage) gt = Genotyper( sample=args.sample, expected_error_rate=args.expected_error_rate, expected_depths=[args.expected_depth], variant_covgs=cp.variant_covgs, gene_presence_covgs=cp.covgs["presence"], base_json=base_json, contamination_depths=[], ignore_filtered=args.ignore_filtered, filters=args.filters, model=args.model, report_all_calls=args.report_all_calls, variant_confidence_threshold=args.min_variant_conf, sequence_confidence_threshold=args.min_gene_conf, min_gene_percent_covg_threshold=args.min_gene_percent_covg_threshold, kmer_size=args.kmer, min_proportion_expected_depth=args.min_proportion_expected_depth, ploidy=args.ploidy, lineage_variants=lineage_dict, ) gt.run() if args.output: with open(args.output, 'w') as outfile: json.dump(gt.out_json, outfile, indent=4) if not args.keep_tmp: cp.remove_temporary_files() return gt.out_json
def test_load_json_compressed(): filepath = "tests/ref_data/tb_variant_to_resistance_drug.json.gz" data = load_json(filepath) assert data["katG_S315T"] == ["Isoniazid"]
def load_manifest(self): if os.path.exists(self.manifest_json): self.manifest = load_json(self.manifest_json) else: self.manifest = {}