Esempio n. 1
0
    def from_cnv(variant):
        assert VariantType.is_cnv(variant._variant_type)

        variant_desc = VariantDesc(variant_type=variant._variant_type,
                                   position=variant.position,
                                   end_position=variant.end_position)
        return VariantDetails(variant.chrom, variant_desc)
Esempio n. 2
0
    def annotate(self, variant):
        logger = logging.getLogger(__name__)
        if VariantType.is_cnv(variant.variant_type):
            return self._do_annotate_cnv(variant)

        effects = []
        if variant.chromosome not in self.gene_models.utr_models:
            effects.append(EffectFactory.create_effect("intergenic"))
            return effects

        for key in self.gene_models.utr_models[variant.chromosome]:
            if (variant.position <= key[1] + self.promoter_len and
                    variant.ref_position_last >= key[0] - self.promoter_len):
                for tm in self.gene_models.utr_models[variant.chromosome][key]:
                    logger.debug(
                        "========: %s-%s :====================",
                        tm.gene,
                        tm.tr_id,
                    )
                    effect = self.get_effect_for_transcript(variant, tm)

                    logger.debug("")
                    logger.debug("Result: %s", effect)
                    logger.debug("")
                    if effect is not None:
                        effects.append(effect)

        if len(effects) == 0:
            effects.append(EffectFactory.create_effect("intergenic"))
        return effects
Esempio n. 3
0
def test_cnv_best_state_X(cnv_raw):
    vs = cnv_raw.query_variants(
        effect_types=["CNV+", "CNV-"],
        variant_type="cnv+ or cnv-",
    )
    vs = [v for v in vs if v.chrom == "X"]

    assert len(vs) == 2
    for v in vs:
        assert v.alt_alleles
        for aa in v.alt_alleles:
            assert VariantType.is_cnv(aa.variant_type)

    assert np.array_equal(
        vs[0].best_state,
        np.asarray([
            [2, 1, 0, 2],
            [0, 0, 1, 0]
        ])
    )

    assert np.array_equal(
        vs[1].best_state,
        np.asarray([
            [2, 1, 0, 2],
            [0, 0, 1, 0]
        ])
    )
Esempio n. 4
0
    def do_annotate(self, aline, variant, liftover_variants):
        if variant is None:
            self._not_found(aline)
            return

        assert variant is not None
        length = None
        if VariantType.is_cnv(variant.variant_type):
            length = variant.end_position - variant.position

        effects = self.effect_annotator.do_annotate_variant(
            chrom=variant.chromosome,
            position=variant.position,
            ref=variant.reference,
            alt=variant.alternative,
            variant_type=variant.variant_type,
            length=length)

        r = self.wrap_effects(effects)

        aline[self.columns["effect_type"]] = r[0]

        aline[self.columns["effect_gene_genes"]] = r[1]
        aline[self.columns["effect_gene_types"]] = r[2]
        aline[self.columns["effect_genes"]] = [
            "{}:{}".format(g, e) for g, e in zip(r[1], r[2])
        ]
        aline[self.columns["effect_details_transcript_ids"]] = r[3]
        aline[self.columns["effect_details_genes"]] = r[4]
        aline[self.columns["effect_details_details"]] = r[5]
        aline[self.columns["effect_details"]] = [
            "{}:{}:{}".format(t, g, d) for t, g, d in zip(r[3], r[4], r[5])
        ]
Esempio n. 5
0
    def _do_annotate_cnv(self, variant):
        assert VariantType.is_cnv(variant.variant_type)
        if variant.variant_type & VariantType.cnv_p:
            effect_type = "CNV+"
        elif variant.variant_type & VariantType.cnv_m:
            effect_type = "CNV-"
        else:
            raise ValueError(
                f"unexpected variant type: {variant.variant_type}")
        assert effect_type is not None

        effects = []
        cnv_region = Region(variant.chromosome, variant.position,
                            variant.position + variant.length)

        for (start, stop), tms in \
                self.gene_models.utr_models[variant.chromosome].items():
            if cnv_region.intersection(Region(variant.chromosome, start,
                                              stop)):
                for tm in tms:
                    effects.append(
                        EffectFactory.create_effect_with_tm(effect_type, tm))

        if len(effects) == 0:
            effects.append(EffectFactory.create_effect(effect_type))

        return effects
Esempio n. 6
0
    def __init__(self,
                 chrom=None,
                 position=None,
                 loc=None,
                 var=None,
                 ref=None,
                 alt=None,
                 length=None,
                 seq=None,
                 variant_type=None):

        self.variant_type = None
        self.length = None

        self.set_position(chrom, position, loc)

        if VariantType.is_cnv(variant_type):
            assert self.chromosome is not None
            assert self.position is not None

            if self.length is None:
                assert length is not None
                self.length = length

            self.variant_type = variant_type
        else:
            self.set_ref_alt(var, ref, alt, length, seq, variant_type)

            self.ref_position_last = self.position + len(self.reference)
            self.alt_position_last = self.position + len(self.alternate)

            self.corrected_ref_position_last = max(self.position,
                                                   self.ref_position_last - 1)
Esempio n. 7
0
    def do_annotate(self, aline, variant, liftover_variants):
        if VariantType.is_cnv(variant.variant_type):
            logger.info(
                f"skip trying to add NP position score for CNV variant "
                f"{variant}")
            self._scores_not_found(aline)
            return

        if self.liftover:
            variant = liftover_variants.get(self.liftover)

        if variant is None:
            self._scores_not_found(aline)
            return

        scores = self._fetch_scores(variant)
        if not scores:
            self._scores_not_found(aline)
            return
        scores_df = self.score_file.scores_to_dataframe(scores)

        if variant.variant_type & VariantType.substitution:
            aline.update(self._aggregate_substitution(variant, scores_df))
        elif variant.variant_type & VariantType.indel:
            aline.update(self._aggregate_indel(variant, scores_df))
        else:
            logger.warning(
                f"unexpected variant type: {variant}, {variant.variant_type}"
            )
            self._scores_not_found(aline)
Esempio n. 8
0
 def __repr__(self) -> str:
     if VariantType.is_cnv(self._variant_type):
         return f"{self.chromosome}:{self.position}-{self.end_position}"
     elif not self.alternative:
         return f"{self.chrom}:{self.position} {self.reference}(ref)"
     else:
         return (f"{self.chrom}:{self.position}"
                 f" {self.reference}->{self.alternative}")
Esempio n. 9
0
    def do_annotate(self, aline, variant, liftover_variants):
        if VariantType.is_cnv(variant.variant_type):
            logger.info(
                f"skip trying to add frequency for CNV variant {variant}")
            self._scores_not_found(aline)
            return

        if self.liftover:
            variant = liftover_variants.get(self.liftover)

        if variant is None:
            self._scores_not_found(aline)
            return

        if self.liftover and liftover_variants.get(self.liftover):
            variant = liftover_variants.get(self.liftover)

        chrom = variant.chromosome
        pos = variant.details.cshl_position
        logger.debug(
            f"{self.score_filename_base}: looking for DAE frequency of "
            f"{variant}; {chrom}:{pos};")

        scores = self.score_file.fetch_scores(chrom, pos, pos)
        if not scores:
            self._scores_not_found(aline)
            return
        variant_detail = variant.details.cshl_variant

        variant_occurrences = scores[self.variant_col_name] \
            .count(variant_detail)
        if variant_occurrences > 0:
            if variant_occurrences > 1:
                logger.warning(
                    f"WARNING {self.score_filename_base}: "
                    f"multiple variant occurrences of {chrom}:{pos} {variant}")

            variant_index = scores[self.variant_col_name].index(variant_detail)
            for native, output in self.config.columns.items():
                # FIXME: this conversion should come from schema
                val = scores[native][variant_index]
                try:
                    if val in set(["", " "]):
                        aline[output] = self.score_file.no_score_value
                    else:
                        aline[output] = float(val)
                    logger.debug(
                        f"DAE frequency: aline[{output}]={aline[output]}")

                except ValueError as ex:
                    logger.error(
                        f"problem with: {output}: {chrom}:{pos} - {val}")
                    logger.error(ex)
                    raise ex
Esempio n. 10
0
 def details(self) -> Optional[VariantDetails]:
     if self._details is None:
         if VariantType.is_cnv(self._variant_type):
             self._details = VariantDetails.from_cnv(self)
         elif self.alternative is None:
             return None
         else:
             self._details = VariantDetails.from_vcf(
                 self.chromosome,
                 self.position,
                 self.reference,
                 self.alternative,
             )
     return self._details
Esempio n. 11
0
    def __init__(self, chrom: str, variant_desc: VariantDesc):

        self.chrom = chrom
        self.variant_desc = variant_desc

        self.cshl_position = self.variant_desc.position
        if VariantType.is_cnv(self.variant_desc.variant_type):
            self.cshl_location = f"{self.chrom}:" \
                f"{self.variant_desc.position}-" \
                f"{self.variant_desc.end_position}"
        else:
            self.cshl_location = f"{self.chrom}:{self.cshl_position}"
        self.cshl_variant = str(variant_desc)
        self.cshl_variant_full = variant_desc.to_cshl_full()
Esempio n. 12
0
    def set_ref_alt(self, var, ref, alt, length, seq, typ):
        if ref is not None:
            assert alt is not None
            assert var is None
            assert length is None
            assert seq is None

            assert not VariantType.is_cnv(typ)
            self.reference = ref
            self.alternate = alt

        if var is not None:
            assert ref is None
            assert alt is None
            assert length is None
            assert seq is None
            assert not VariantType.is_cnv(typ)

            self.set_ref_alt_from_variant(var)

        self.trim_equal_ref_alt_parts()
        assert self.reference is not None
        assert self.alternate is not None
Esempio n. 13
0
def test_cnv_impala(cnv_impala):
    vs = cnv_impala.query_variants(
        effect_types=["CNV+", "CNV-"],
        variant_type="cnv+ or cnv-",
        inheritance="denovo"
    )
    vs = list(vs)

    print(vs)

    for v in vs:
        assert v.alt_alleles
        for aa in v.alt_alleles:
            print(aa)
            assert VariantType.is_cnv(aa.variant_type)
    assert len(vs) == 12
Esempio n. 14
0
    def do_annotate(self, aline, variant, liftover_variants):
        if VariantType.is_cnv(variant.variant_type):
            logger.info(
                f"skip trying to add position score for CNV variant {variant}")
            self._scores_not_found(aline)
            return

        if self.liftover:
            variant = liftover_variants.get(self.liftover)

        if variant is None:
            self._scores_not_found(aline)
            return

        scores = self._fetch_scores(variant)

        logger.debug(
            f"{self.score_file.score_filename} looking for score of {variant}")
        if not scores:
            logger.debug(
                f"{self.score_file.score_filename} score not found"
            )
            self._scores_not_found(aline)
            return

        counts = scores["COUNT"]
        total_count = sum(counts)

        for score_name in self.score_names:
            column_name = getattr(self.config.columns, score_name)
            values = list(
                map(lambda x: self._convert_score(x), scores[score_name])
            )
            assert len(values) > 0
            if len(values) == 1:
                aline[column_name] = values[0]
            else:
                values = list(filter(None, values))
                total_sum = sum(
                    [c * v for (c, v) in zip(counts, values)]
                )
                aline[column_name] = \
                    (total_sum / total_count) if total_sum \
                    else self.score_file.no_score_value
                logger.debug(
                    f"aline[{column_name}]={aline[column_name]}")
Esempio n. 15
0
    def liftover_variant(self, variant):
        assert isinstance(variant, SummaryAllele)
        if VariantType.is_cnv(variant.variant_type):
            return
        try:
            lo_variant = liftover_variant(variant.chrom, variant.position,
                                          variant.reference,
                                          variant.alternative, self.liftover,
                                          self.target_genome)

            if lo_variant is None:
                return

            lo_chrom, lo_pos, lo_ref, lo_alt = lo_variant
            result = SummaryAllele(lo_chrom, lo_pos, lo_ref, lo_alt)
            result.variant_type

            return result
        except Exception as ex:
            logger.warning(f"problem in variant {variant} liftover: {ex}")
Esempio n. 16
0
    def do_annotate(self, aline, variant, liftover_variants):
        if VariantType.is_cnv(variant.variant_type):
            logger.info(
                f"skip trying to add VCF info score for CNV variant {variant}")
            self._scores_not_found(aline)
            return

        if self.liftover:
            variant = liftover_variants.get(self.liftover)

        if variant is None:
            self._scores_not_found(aline)
            return

        chrom = variant.chromosome
        pos = variant.position
        logger.debug(
            f"{self.score_file.score_filename}: looking for VCF frequency of "
            f"{variant}; {chrom}:{pos};")

        scores = self.score_file.fetch_scores(chrom, pos, pos)
        if not scores:
            self._scores_not_found(aline)
            return

        logger.debug(
            f"scores found: {scores}")

        assert len(scores["REF"]) == len(scores["ALT"])
        refs = scores["REF"]
        alts = scores["ALT"]
        for index, (ref, alt) in enumerate(zip(refs, alts)):
            if variant.reference == ref and variant.alternative == alt:
                for name, output in self.config.columns.items():
                    aline[output] = scores[name][index]
                    logger.debug(
                        f"VCF frequency: aline[{output}]={aline[output]}")
                return