Example #1
0
def run(inheritance_model, ped, vcf, min_depth, min_gq, min_kindreds,
        severity):
    from cyvcf2 import VCF, Writer
    vcf = VCF(vcf, samples="-")

    annos = {}
    if "ANN" in vcf:
        desc = vcf["ANN"]["Description"]
        parts = [
            x.strip("\"'") for x in re.split("\s*\|\s*",
                                             desc.split(":", 1)[1].strip('" '))
        ]
        annos["ANN"] = desc
    if "EFF" in vcf:
        desc = vcf["EFF"]["Description"]
        parts = [
            x.strip(" [])'(\"")
            for x in re.split("\||\(",
                              desc.split(":", 1)[1].strip())
        ]
        annos["EFF"] = parts
    if "CSQ" in vcf:
        desc = vcf["CSQ"]["Description"]
        parts = [
            x.strip(" [])'(\"")
            for x in re.split("\||\(",
                              desc.split(":", 1)[1].strip())
        ]
        annos["CSQ"] = parts

    vcf.update(id="inheritance",
               type="String",
               number="1",
               description="inheritance stuffs")
    out = Writer("-", vcf)

    vcf_order = dict((n, i) for i, n in (enumerate(vcf.samples)))
    fams = Family.from_ped(ped, order=vcf_order)
    for fam_id in fams:
        fams[fam_id] = (EvalFamily(fams[fam_id]),
                        [s._i for s in fams[fam_id].subjects])

    def get_gene(variant):
        for anno in annos:
            consequences = variant.INFO[anno].split(",")
            effs = (Effect.new(anno, c, annos[anno]) for c in consequences)
            # limit to requested severity
            if severity is not None:
                effs = [e for e in effs if e.impact_severity in severity]
            effs = sorted(effs, reverse=True)
            for eff in effs:
                if eff.gene:
                    return eff.gene

    # TODO: more flexible groupby
    for gene, variants in it.groupby(vcf, get_gene):

        matching_fams = defaultdict(list)
        saved_vars = []
        uniq_fams = []

        for i, variant in enumerate(variants):
            saved_vars.append(variant)

            for family_id, (fam, idxs) in fams.items():
                fam.gt_types = variant.gt_types[idxs]
                fam.gt_depths = variant.gt_depths[idxs]
                fam.gt_quals = variant.gt_quals[idxs]
                # this dispatches to fam.auto_rec/auto_dom/de_novo/, etc. by the string
                # in inheritance model
                res = getattr(fam, inheritance_model)(min_depth=min_depth,
                                                      min_gq=min_gq)

                # matched the inheritance model.
                if res:  # can add custom logic here, e.g. and v.call_rate > 0.9:
                    matching_fams[i].append(family_id)
                    uniq_fams.append(family_id)

        if 0 < len(set(uniq_fams)) >= min_kindreds:

            if inheritance_model == 'comp_het':
                # TODO: idxs = matching_fams.keys()
                # run idxs[1:] vs idxs[:-1] for variants
                pass
            for i, family_ids in sorted(matching_fams.items()):
                variant = saved_vars[i]
                variant.INFO["inheritance"] = "%s:%s" % (gene, ",".join(
                    set(family_ids)))

                out.write_record(variant)
Example #2
0
def run(inheritance_model, ped, vcf, min_depth, min_gq, min_kindreds, severity):
    from cyvcf2 import VCF, Writer
    vcf = VCF(vcf, samples="-")

    annos = {}
    if "ANN" in vcf:
        desc = vcf["ANN"]["Description"]
        parts = [x.strip("\"'") for x in re.split("\s*\|\s*", desc.split(":", 1)[1].strip('" '))]
        annos["ANN"] = desc
    if "EFF" in vcf:
        desc = vcf["EFF"]["Description"]
        parts = [x.strip(" [])'(\"") for x in re.split("\||\(", desc.split(":", 1)[1].strip())]
        annos["EFF"] = parts
    if "CSQ" in vcf:
        desc = vcf["CSQ"]["Description"]
        parts = [x.strip(" [])'(\"") for x in re.split("\||\(", desc.split(":", 1)[1].strip())]
        annos["CSQ"] = parts

    vcf.update(id="inheritance", type="String", number="1", description="inheritance stuffs")
    out = Writer("-", vcf)

    vcf_order = dict((n, i) for i, n in (enumerate(vcf.samples)))
    fams = Family.from_ped(ped, order=vcf_order)
    for fam_id in fams:
        fams[fam_id] = (EvalFamily(fams[fam_id]), [s._i for s in fams[fam_id].subjects])

    def get_gene(variant):
        for anno in annos:
            consequences = variant.INFO[anno].split(",")
            effs = (Effect.new(anno, c, annos[anno]) for c in consequences)
            # limit to requested severity
            if severity is not None:
                effs = [e for e in effs if e.impact_severity in severity]
            effs = sorted(effs, reverse=True)
            for eff in effs:
                if eff.gene:
                    return eff.gene

    # TODO: more flexible groupby
    for gene, variants in it.groupby(vcf, get_gene):

        matching_fams = defaultdict(list)
        saved_vars = []
        uniq_fams = []

        for i, variant in enumerate(variants):
            saved_vars.append(variant)

            for family_id, (fam, idxs) in fams.items():
                fam.gt_types = variant.gt_types[idxs]
                fam.gt_depths = variant.gt_depths[idxs]
                fam.gt_quals = variant.gt_quals[idxs]
                # this dispatches to fam.auto_rec/auto_dom/de_novo/, etc. by the string
                # in inheritance model
                res = getattr(fam, inheritance_model)(min_depth=min_depth, min_gq=min_gq)

                # matched the inheritance model.
                if res: # can add custom logic here, e.g. and v.call_rate > 0.9:
                    matching_fams[i].append(family_id)
                    uniq_fams.append(family_id)

        if 0 < len(set(uniq_fams)) >= min_kindreds:

            if inheritance_model == 'comp_het':
                # TODO: idxs = matching_fams.keys()
                # run idxs[1:] vs idxs[:-1] for variants
                pass
            for i, family_ids in sorted(matching_fams.items()):
                variant = saved_vars[i]
                variant.INFO["inheritance"] = "%s:%s" % (gene, ",".join(set(family_ids)))

                out.write_record(variant)