Exemple #1
0
    def update_manifest(self, filename=None, url=None):
        self.create_root()
        error_message = f"Cannot update panel list. Looks like another process is already trying to modify the mykrobe data directory {self.root_dir}. If this is not the case, then you can delete this file and try again: {self.lock_file}"
        self.start_lock(error_message)
        if filename is not None:
            logger.info(
                f"Getting latest panel information from file {filename}")
            new_manifest = load_json(filename)
        else:
            if url is None:
                url = MANIFEST_URL
            try:
                logger.info(f"Getting panels information from {url}")
                new_manifest = json.loads(requests.get(url).text)
            except:
                raise RuntimeError(
                    f"Error getting latest panel information from {url}")

        for species, species_dict in new_manifest.items():
            logger.info(f"Updating metadata for species {species}")
            if species not in self.manifest:
                self.manifest[species] = {"installed": None}
            self.manifest[species]["latest"] = species_dict

        self.save_manifest()
        self.stop_lock()
        logger.info(
            f"Finished updating metadata in panels directory {self.root_dir}")
Exemple #2
0
 def __init__(self, root_dir):
     self.root_dir = os.path.abspath(root_dir)
     if not os.path.exists(self.root_dir):
         raise FileNotFoundError(f"Species directory {self.root_dir} not found.")
     self.manifest_json = os.path.join(root_dir, "manifest.json")
     if not os.path.exists(self.manifest_json):
         raise FileNotFoundError(f"Manifest file not found in species directory {self.root_dir}. Expected to find {self.manifest_json}.")
     self.panel = None
     self.manifest = load_json(self.manifest_json)
     self.set_panel(self.default_panel())
Exemple #3
0
def file_paths_to_combined_dict(l):
    ana = {}
    for f in l:
        try:
            data = load_json(f)
        except ValueError as e:
            sys.stderr.write(str(e) + " %s \n" % f)
        else:
            assert data.keys()[0] not in ana
            ana.update(data)
    return ana
Exemple #4
0
def file_paths_to_combined_dict(l):
    ana = {}
    for f in l:
        try:
            data = load_json(f)
        except ValueError as e:
            sys.stderr.write(str(e) + " %s \n" % f)
        else:
            assert data.keys()[0] not in ana
            ana.update(data)
    return ana
Exemple #5
0
def get_drugs(drug_list):
    drugs = []
    for f in args.files:
        try:
            d = load_json(f)
        except ValueError:
            d = {}
        for drug in drug_list:
            if drug not in drugs:
                drugs.append(drug)
    return drugs
Exemple #6
0
 def __init__(self,
              variant_calls,
              called_genes,
              base_json=None,
              depth_threshold=3,
              ignore_filtered=True,
              ignore_minor_calls=False,
              variant_to_resistance_json_fp=None):
     if base_json is None:
         base_json = {}
     if variant_to_resistance_json_fp:
         self.variant_or_gene_name_to_resistance_drug = load_json(
             variant_to_resistance_json_fp)
     else:
         self.variant_or_gene_name_to_resistance_drug = load_json(
             self.default_variant_to_resistance_drug)
     self.variant_calls = variant_calls
     self.called_genes = called_genes
     self.drugs = self._get_drug_list_from_variant_to_resistance_drug()
     self.resistance_prediction = self._create_initial_resistance_prediction(
     )
     self.out_json = base_json
     self._cn_threshold = {
         "ermA": 0.19,
         "ermB": 0.19,
         "ermC": 0.19,
         "ermT": 0.19,
         "ermY": 0.19,
         "fusA": 0.03,
         "fusC": 0.03,
         "aacAaphD": 0.04,
         "mecA": 0.06,
         "mupA": 0.21,
         "blaZ": 0.04,
         "tetK": 0.13
     }
     self.depth_threshold = depth_threshold
     self.ignore_filtered = ignore_filtered
     self.ignore_minor_calls = ignore_minor_calls
Exemple #7
0
 def __init__(self,
              phylo_group_covgs,
              sub_complex_covgs,
              species_covgs,
              lineage_covgs,
              verbose=False,
              hierarchy_json_file=None):
     self.phylo_group_covgs = phylo_group_covgs
     self.sub_complex_covgs = sub_complex_covgs
     self.species_covgs = species_covgs
     self.lineage_covgs = lineage_covgs
     self.out_json = {}
     self.threshold = {}
     self.verbose = verbose
     try:
         self.hierarchy = Hierarchy(load_json(hierarchy_json_file))
     except TypeError:
         self.hierarchy = {}
Exemple #8
0
def ref_data_from_args(args):
    if args.species == "custom":
        if args.custom_probe_set_path is None:
            raise ValueError(
                "Must use --custom_probe_set_path option if the species is 'custom'"
            )
        ref_data = {
            "fasta_files": [args.custom_probe_set_path],
            "var_to_res_json": args.custom_variant_to_resistance_json,
            "hierarchy_json": None,
            "lineage_json": args.custom_lineage_json,
            "kmer": args.kmer,
            "version": "custom",
            "species_phylo_group": None,
        }
    else:
        data_dir = DataDir(args.panels_dir)
        species_dir = data_dir.get_species_dir(args.species)
        if args.panel is not None:
            species_dir.set_panel(args.panel)
        ref_data = {
            "fasta_files": species_dir.fasta_files(),
            "var_to_res_json": species_dir.json_file("amr"),
            "hierarchy_json": species_dir.json_file("hierarchy"),
            "lineage_json": species_dir.json_file("lineage"),
            "kmer": species_dir.kmer(),
            "version": species_dir.version(),
            "species_phylo_group": species_dir.species_phylo_group(),
        }

    if ref_data["lineage_json"] is None:
        ref_data["lineage_dict"] = None
    else:
        ref_data["lineage_dict"] = load_json(ref_data["lineage_json"])

    return ref_data
Exemple #9
0
def diff_stats(stats1, stats2):
    TP = stats1.TP - stats2.TP
    FP = stats1.FP - stats2.FP
    TN = stats1.TN - stats2.TN
    FN = stats1.FN - stats2.FN

    sensitivity = stats1.sensitivity - stats2.sensitivity
    specificity = stats1.specificity - stats2.specificity
    total = stats1.total - stats2.total

    return [total, TP, FP, TN, FN, sensitivity, specificity]


# Load data
truth = load_json(args.truth)
ana1 = file_paths_to_combined_dict(args.ana1)
ana2 = file_paths_to_combined_dict(args.ana2)

truth_susceptibility = combined_dict_to_result_objects(truth)
ana1_susceptibility = combined_dict_to_result_objects(ana1)
ana2_susceptibility = combined_dict_to_result_objects(ana2)

# sample_ids = get_union_sample_ids( ana1, ana2)
sample_ids = get_intersection_sample_ids(ana1, ana2)

if args.markdown:
    row_delim = " | "
else:
    row_delim = "\t"
Exemple #10
0
def diff_stats(stats1, stats2):
    TP = stats1.TP - stats2.TP
    FP = stats1.FP - stats2.FP
    TN = stats1.TN - stats2.TN
    FN = stats1.FN - stats2.FN

    sensitivity = stats1.sensitivity - stats2.sensitivity
    specificity = stats1.specificity - stats2.specificity
    total = stats1.total - stats2.total

    return [total, TP, FP, TN, FN, sensitivity, specificity]


# Load data
truth = load_json(args.truth)
ana1 = file_paths_to_combined_dict(args.ana1)
ana2 = file_paths_to_combined_dict(args.ana2)

truth_susceptibility = combined_dict_to_result_objects(truth)
ana1_susceptibility = combined_dict_to_result_objects(ana1)
ana2_susceptibility = combined_dict_to_result_objects(ana2)

# sample_ids = get_union_sample_ids( ana1, ana2)
sample_ids = get_intersection_sample_ids(ana1, ana2)

if args.markdown:
    row_delim = " | "
else:
    row_delim = "\t"
Exemple #11
0
def run_main(parser, args):
    args = parser.parse_args()
    verbose = True
    if args.ont:
        args.expected_error_rate = 0.15
        args.filters = ["LOW_GT_CONF"]
        args.model = "kmer_count"
        logger.debug("Setting expected error rate to %s (--ont)" %
                     args.expected_error_rate)
        logger.debug(
            "Removing LOW_PERCENT_COVERAGE filter (increases sensitivity - in particular for ONT data)"
        )

    if args.min_variant_conf is None:
        args.min_variant_conf = 100
    cp = CoverageParser(
        sample=args.sample,
        panel_file_paths=[args.probe_set],
        seq=args.seq,
        ctx=args.ctx,
        kmer=args.kmer,
        force=args.force,
        verbose=verbose,
        tmp_dir=args.tmp,
        skeleton_dir=args.skeleton_dir,
        threads=args.threads,
        memory=args.memory,
    )
    cp.run()
    if args.expected_depth is None:
        args.expected_depth = cp.estimate_depth()

    base_json = {args.sample: {}}
    base_json[args.sample]["probe_set"] = args.probe_set
    if args.seq:
        base_json[args.sample]["files"] = args.seq
    else:
        base_json[args.sample]["files"] = args.ctx
    base_json[args.sample]["kmer"] = args.kmer
    base_json[args.sample]["version"] = __version__
    if args.lineage is None:
        lineage_dict = None
    else:
        lineage_dict = load_json(args.lineage)
    gt = Genotyper(
        sample=args.sample,
        expected_error_rate=args.expected_error_rate,
        expected_depths=[args.expected_depth],
        variant_covgs=cp.variant_covgs,
        gene_presence_covgs=cp.covgs["presence"],
        base_json=base_json,
        contamination_depths=[],
        ignore_filtered=args.ignore_filtered,
        filters=args.filters,
        model=args.model,
        report_all_calls=args.report_all_calls,
        variant_confidence_threshold=args.min_variant_conf,
        sequence_confidence_threshold=args.min_gene_conf,
        min_gene_percent_covg_threshold=args.min_gene_percent_covg_threshold,
        kmer_size=args.kmer,
        min_proportion_expected_depth=args.min_proportion_expected_depth,
        ploidy=args.ploidy,
        lineage_variants=lineage_dict,
    )
    gt.run()
    if args.output:
        with open(args.output, 'w') as outfile:
            json.dump(gt.out_json, outfile, indent=4)

    if not args.keep_tmp:
        cp.remove_temporary_files()
    return gt.out_json
Exemple #12
0
def test_load_json_compressed():
    filepath = "tests/ref_data/tb_variant_to_resistance_drug.json.gz"
    data = load_json(filepath)

    assert data["katG_S315T"] == ["Isoniazid"]
Exemple #13
0
 def load_manifest(self):
     if os.path.exists(self.manifest_json):
         self.manifest = load_json(self.manifest_json)
     else:
         self.manifest = {}