Ejemplo n.º 1
0
def test_lift_over(mocker, chrom, pos, lift_over, expected, genomes_db_2013):

    options = {
        "mode": "replace",
        "vcf": True,
        "direct": True,
        "region": None,
        "chain_file": "fake_chain_file",
        "c": "chrom",
        "p": "pos",
        "liftover": "lo1",
    }

    columns = {
        "new_x": "hg19_location",
    }

    config = AnnotationConfigParser.parse_section({
            "options": options,
            "columns": columns,
            "annotator": "lift_over_annotator.LiftOverAnnotator",
            "virtual_columns": [],
        }
    )
    mocker.patch(
        "dae.annotation.tools.lift_over_annotator."
        "LiftOverAnnotator.load_liftover_chain")

    mocker.patch(
        "dae.annotation.tools.lift_over_annotator."
        "LiftOverAnnotator.load_target_genome")

    annotator = LiftOverAnnotator(config, genomes_db_2013)
    assert annotator is not None

    annotator.liftover = mocker.Mock()
    annotator.liftover.convert_coordinate = lift_over
    annotator.target_genome = mocker.Mock()
    annotator.target_genome.get_sequence = mock_get_sequence

    aline = {
        "chrom": chrom,
        "pos": pos,
    }
    allele = SummaryAllele(chrom, pos, "A", "T")
    liftover_variants = {}
    annotator.do_annotate(aline, allele, liftover_variants)

    lo_variant = liftover_variants.get("lo1")
    print(f"liftover variant: {lo_variant}")
    lo_location = lo_variant.details.cshl_location if lo_variant else None

    assert expected == lo_location
def test_parquet_frequency_bin(fam1, gt, attributes, rare_boundary, expected):
    summary_alleles = [
        SummaryAllele("1", 11539, "T", None, 0, 0, attributes=attributes)
    ] * 3
    sv = SummaryVariant(summary_alleles)
    fv = FamilyVariant(sv, fam1, gt, None)
    pd = ParquetPartitionDescriptor(["1"], 1000, rare_boundary=rare_boundary)

    for fa in fv.alleles:
        assert pd._evaluate_frequency_bin(fa) == expected
        assert (pd.variant_filename(
            fa) == f"region_bin=1_11/frequency_bin={expected}/" +
                f"variants_region_bin_1_11_frequency_bin_{expected}.parquet")
def test_parquet_coding_bin(fam1, gt, eff1, eff2, eff3, coding_effect_types,
                            expected):
    summary_alleles = [
        SummaryAllele("1", 11539, "T", None, 0, 0),
        SummaryAllele("1", 11539, "T", "G", 0, 1, attributes={"effects":
                                                              eff1}),
        SummaryAllele("1", 11539, "T", "C", 0, 2, attributes={"effects":
                                                              eff2}),
        SummaryAllele("1", 11539, "T", "A", 0, 3, attributes={"effects":
                                                              eff3}),
    ]
    gt = np.array([[0, 1, 0], [2, 0, 3]], dtype="int8")
    sv = SummaryVariant(summary_alleles)
    fv = FamilyVariant(sv, fam1, gt, None)
    pd = ParquetPartitionDescriptor(["1"],
                                    1000,
                                    coding_effect_types=coding_effect_types)
    for fa, ex in zip(fv.alleles, expected):
        assert pd._evaluate_coding_bin(fa) == ex
        assert (
            pd.variant_filename(fa) == f"region_bin=1_11/coding_bin={ex}/" +
            f"variants_region_bin_1_11_coding_bin_{ex}.parquet")
Ejemplo n.º 4
0
    def build_variant(self, aline):
        assert self.chrom, self.chrom
        chrom = aline[self.chrom]
        position = aline[self.position]
        ref = aline[self.ref]
        alt = aline[self.alt]

        if chrom is None or position is None:
            return None
        if not alt:
            return None

        summary = SummaryAllele(chrom, int(position), ref, alt)
        return summary
Ejemplo n.º 5
0
    def build_variant(self, aline):
        # logger.debug(f"DAEBuilder: build_variant({aline}")
        variant = aline[self.variant]
        if self.location in aline:
            location = aline[self.location]
            chrom, position = location.split(":")
        else:
            assert self.chrom in aline
            assert self.position in aline
            chrom = aline[self.chrom]
            position = aline[self.position]

        vcf_position, ref, alt = dae2vcf_variant(chrom, int(position), variant,
                                                 self.genomic_sequence)
        summary = SummaryAllele(chrom, vcf_position, ref, alt)
        return summary
Ejemplo n.º 6
0
def test_dae2vcf(
    mocker, variant, check_pos, check_cshl_pos, check_ref, check_alt
):

    genome = mocker.Mock()
    genome.get_sequence = lambda _, start, end: "A" * (end - start + 1)

    pos, ref, alt = dae2vcf_variant("chr1", 150013938, variant, genome)

    assert pos == check_pos
    assert ref == check_ref
    assert alt == check_alt

    summary = SummaryAllele("chr1", pos, ref, alt)
    assert summary is not None

    assert summary.cshl_position == check_cshl_pos
    assert summary.cshl_location == f"chr1:{check_cshl_pos}"
    assert summary.cshl_variant == variant
Ejemplo n.º 7
0
    def liftover_variant(self, variant):
        assert isinstance(variant, SummaryAllele)
        if VariantType.is_cnv(variant.variant_type):
            return
        try:
            lo_variant = liftover_variant(variant.chrom, variant.position,
                                          variant.reference,
                                          variant.alternative, self.liftover,
                                          self.target_genome)

            if lo_variant is None:
                return

            lo_chrom, lo_pos, lo_ref, lo_alt = lo_variant
            result = SummaryAllele(lo_chrom, lo_pos, lo_ref, lo_alt)
            result.variant_type

            return result
        except Exception as ex:
            logger.warning(f"problem in variant {variant} liftover: {ex}")
Ejemplo n.º 8
0
def test_allele_best_state(
    chromosome,
    position,
    reference,
    alternative,
    allele_index,
    allele_count,
    genotype,
    expected,
    sample_family,
):

    fa = FamilyAllele(
        SummaryAllele(
            chromosome, position, reference, alternative, 0, allele_index, {},
        ),
        sample_family,
        genotype,
        None,
    )
    fa.update_attributes({"allele_count": allele_count})
    assert np.array_equal(fa.best_state, expected)
import pytest
import numpy as np
from dae.backends.impala.parquet_io import ParquetPartitionDescriptor, \
    NoPartitionDescriptor
from dae.variants.family_variant import FamilyVariant
from dae.variants.variant import SummaryAllele, SummaryVariant

summary_alleles_chr1 = [
    SummaryAllele("1", 11539, "T", None, 0, 0),
    SummaryAllele("1", 11539, "T", "TA", 0, 1),
    SummaryAllele("1", 11539, "T", "TG", 0, 2),
]

summary_alleles_chr2 = [
    SummaryAllele("2", 11539, "T", None, 0, 0),
    SummaryAllele("2", 11539, "T", "TA", 0, 1),
    SummaryAllele("2", 11539, "T", "TG", 0, 2),
]


@pytest.mark.parametrize(
    "chromosomes, region_length, summary_alleles, expected",
    [
        (["1", "2"], 1000, summary_alleles_chr1, "1_11"),
        (["1", "2"], 1000, summary_alleles_chr2, "2_11"),
        (["1"], 1000, summary_alleles_chr1, "1_11"),
        (["2"], 1000, summary_alleles_chr1, "other_11"),
    ],
)
def test_parquet_region_bin(fam1, gt, chromosomes, region_length,
                            summary_alleles, expected):
Ejemplo n.º 10
0
def svX1():
    return SummaryVariant([
        SummaryAllele("X", 154931050, "T", None, 0, 0),
        SummaryAllele("X", 154931050, "T", "A", 0, 1),
        SummaryAllele("X", 154931050, "T", "G", 0, 2),
    ])
Ejemplo n.º 11
0
def sv1():
    return SummaryVariant([
        SummaryAllele("1", 11539, "T", None, 0, 0),
        SummaryAllele("1", 11539, "T", "TA", 0, 1),
        SummaryAllele("1", 11539, "T", "TG", 0, 2),
    ])