Beispiel #1
0
    def _parse_variant_panel(self, row):
        allele, median_depth, min_depth, percent_coverage, k_count = self._parse_summary_covgs_row(
            row)
        params = get_params(allele)
        if 'var_name' in params:
            var_name = params.get('var_name')
        else:
            var_name = allele.split('?')[0].split('-')[1]

        num_alts = int(params.get("num_alts", 0))
        reference_coverages = [
            ProbeCoverage(percent_coverage=percent_coverage,
                          median_depth=median_depth,
                          min_depth=min_depth,
                          k_count=k_count)
        ]
        alt_or_ref = 'ref'
        alternate_coverages = []
        for i in range(num_alts - 1):
            row = next(self.reader)
            ref_allele, median_depth, min_depth, percent_coverage, k_count = self._parse_summary_covgs_row(
                row)
            if ref_allele.split('-')[0] != 'ref':
                logger.warning("Fewer ref alleles than alt alleles for %s" %
                               ref_allele)
                alternate_coverages.append(
                    ProbeCoverage(min_depth=min_depth,
                                  k_count=k_count,
                                  percent_coverage=percent_coverage,
                                  median_depth=median_depth))
                num_alts -= 1
                break

            assert ref_allele.split('-')[0] == 'ref'
            reference_coverages.append(
                ProbeCoverage(percent_coverage=percent_coverage,
                              median_depth=median_depth,
                              min_depth=min_depth,
                              k_count=k_count))
        for i in range(num_alts):
            row = next(self.reader)
            alt_allele, median_depth, min_depth, percent_coverage, k_count = self._parse_summary_covgs_row(
                row)
            assert alt_allele.split('-')[0] == 'alt'
            alternate_coverages.append(
                ProbeCoverage(min_depth=min_depth,
                              k_count=k_count,
                              percent_coverage=percent_coverage,
                              median_depth=median_depth))
        variant_probe_coverage = VariantProbeCoverage(
            reference_coverages=reference_coverages,
            alternate_coverages=alternate_coverages,
            var_name=var_name,
            params=params)
        try:
            self.variant_covgs[allele].append(variant_probe_coverage)
        except KeyError:
            self.variant_covgs[allele] = [variant_probe_coverage]
Beispiel #2
0
 def _name_to_id(self, probe_name):
     names = []
     params = get_params(probe_name)
     if params.get("mut"):
         names.append("_".join([params.get("gene"), params.get("mut")]))
         var_name = params.get("var_name")
     else:
         var_name = probe_name.split('?')[0].split('-')[1]
     names.append(var_name)
     return "-".join(names)
Beispiel #3
0
    def _parse_seq_panel(self, row):
        allele, median_depth, min_depth, percent_coverage, k_count = self._parse_summary_covgs_row(
            row)
        probe_coverage = ProbeCoverage(percent_coverage=percent_coverage,
                                       median_depth=median_depth,
                                       min_depth=min_depth,
                                       k_count=k_count)

        allele_name = allele.split('?')[0]
        params = get_params(allele)
        panel_type = params.get("panel_type", "presence")
        name = params.get('name')
        version = params.get('version', '1')
        if panel_type in ["variant", "presence"]:
            sequence_probe_coverage = SequenceProbeCoverage(
                name=name,
                probe_coverage=probe_coverage,
                version=version,
                length=params.get("length"))
            try:
                self.covgs[panel_type][name][version] = sequence_probe_coverage
            except KeyError:
                self.covgs[panel_type][name] = {}
                self.covgs[panel_type][name][version] = sequence_probe_coverage

        else:
            # Species panels are treated differently
            l = int(params.get("length", -1))
            try:
                self.covgs[panel_type][name]["total_bases"] += l
                if percent_coverage > 75 and median_depth > 0:
                    self.covgs[panel_type][name]["percent_coverage"].append(
                        percent_coverage)
                    self.covgs[panel_type][name]["length"].append(l)
                    self.covgs[panel_type][name]["median"].append(median_depth)
            except KeyError:
                if panel_type not in self.covgs:
                    self.covgs[panel_type] = {}
                self.covgs[panel_type][name] = {}
                self.covgs[panel_type][name]["total_bases"] = l
                if percent_coverage > 75 and median_depth > 0:
                    self.covgs[panel_type][name]["percent_coverage"] = [
                        percent_coverage
                    ]
                    self.covgs[panel_type][name]["length"] = [l]
                    self.covgs[panel_type][name]["median"] = [median_depth]
                else:
                    self.covgs[panel_type][name]["percent_coverage"] = []
                    self.covgs[panel_type][name]["length"] = []
                    self.covgs[panel_type][name]["median"] = []
Beispiel #4
0
 def _create_variant(self, probe_name):
     names = []
     params = get_params(probe_name)
     if params.get("mut"):
         names.append("_".join([params.get("gene"), params.get("mut")]))
     var_name = probe_name.split('?')[0].split('-')[1]
     names.append(var_name)
     try:
         # If it's a variant panel we can create a variant
         ref, start, alt = split_var_name(var_name)
         return Variant.create(start=start,
                               reference_bases=ref,
                               alternate_bases=[alt],
                               names=names,
                               info=params)
     except AttributeError:
         return None
Beispiel #5
0
def run(parser, args):
    genes = {}
    skip_list = {
        "tem": ["191", "192"],
        "oxa": ["12", "14", "33"],
        "shv": ["12", "6"]
    }
    check_args(args)
    if args.seq:
        build_binary()
    if args.also_genotype:
        _out_dict = run_genotype(parser, args)
    else:
        _out_dict = {}
        _out_dict[args.sample] = {}
    _out_dict[args.sample]["paths"] = {}
    out_dict = _out_dict[args.sample]["paths"]
    wb = WebServer(port=0,
                   args=[args.ctx],
                   memory=args.memory,
                   mccortex_path=args.mccortex31_path)
    logger.debug("Loading binary")
    wb.start()
    logger.debug("Walking the graph")
    gw = GraphWalker(proc=wb.mccortex, kmer_size=args.kmer, print_depths=True)
    with open(args.probe_set, 'r') as infile:
        for i, record in enumerate(SeqIO.parse(infile, "fasta")):
            repeat_kmers = get_repeat_kmers(record, args.kmer)
            params = get_params(record.id)
            gene_name = params.get("name", i)
            version = params.get("version", i)
            if gene_name not in genes:
                logger.debug("Loading kmer data for %s" % (gene_name))
            last_kmer = str(record.seq)[-args.kmer:]
            start_kmer, skipped = find_start_kmer(str(record.seq), gw.mcq,
                                                  args.kmer)
            if gene_name not in genes:
                genes[gene_name] = {}
                genes[gene_name]["pathdetails"] = []
                genes[gene_name]["known_kmers"] = ""
            if version not in skip_list.get(gene_name, []) and start_kmer:
                pd = PathDetails(start_kmer,
                                 last_kmer,
                                 len(record.seq),
                                 skipped=skipped,
                                 v=version)
                pd.set_repeat_kmers(repeat_kmers)
                genes[gene_name]["pathdetails"].append(pd)

            if gene_name in genes:
                genes[gene_name]["known_kmers"] += "%sN" % str(record.seq)

    for gene_name, gene_dict in genes.items():
        logger.debug("Walking graph with seeds defined by %s" % gene_name)
        paths = get_paths_for_gene(gene_name, gene_dict, gw)
        if args.show_all_paths:
            out_dict[gene_name] = paths.values()
        else:
            if len(paths.keys()) > 1:
                # choose best version
                best_path = choose_best_assembly(paths.values())
            elif len(paths.keys()) == 1:
                best_path = paths.values()[0]
            else:
                best_path = {"found": False}
            out_dict[gene_name] = [best_path]
    print(json.dumps(_out_dict, sort_keys=False, indent=4))
    logger.info("Cleaning up")
    if wb is not None:
        wb.stop()