Example #1
0
def test_denovo_loader_avoids_duplicates(
    genome_2013,
    fixture_dirname,
    fake_families,
):
    denovo_filename = fixture_dirname(
        "denovo_import/variants_VCF_style_dup.tsv")
    params = {
        "denovo_chrom": "chrom",
        "denovo_pos": "pos",
        "denovo_ref": "ref",
        "denovo_alt": "alt",
        "denovo_family_id": "familyId",
        "denovo_best_state": "bestState"
    }
    variants_loader = DenovoLoader(fake_families,
                                   denovo_filename,
                                   genome=genome_2013,
                                   params=params)

    vs = variants_loader.full_variants_iterator()

    svs = []
    fvs = []
    for sv, fvs_ in vs:
        print(sv, fvs)
        svs.append(sv)
        for fv in fvs_:
            fvs.append(fv)

    assert len(svs) == 3
    assert len(fvs) == 4
Example #2
0
def test_extra_attributes_serialization_deserialization(
        fixtures_gpf_instance, fixture_dirname):
    families_data = FamiliesLoader.load_simple_families_file(
        fixture_dirname("backends/iossifov_extra_attrs.ped"))

    loader = DenovoLoader(
        families_data, fixture_dirname("backends/iossifov_extra_attrs.tsv"),
        fixtures_gpf_instance.get_genome()
    )

    main_schema = loader.get_attribute("annotation_schema")
    extra_attributes = loader.get_attribute("extra_attributes")

    serializer = AlleleParquetSerializer(main_schema, extra_attributes)
    it = loader.full_variants_iterator()
    variant = next(it)[1][0]
    print(variant.gt)
    summary_blobs = serializer.serialize_summary_data(variant.alleles)
    scores_blob = serializer.serialize_scores_data(variant.alleles)
    variant_blob = serializer.serialize_family_variant(
        variant.alleles, summary_blobs, scores_blob
    )
    extra_blob = serializer.serialize_extra_attributes(variant)
    family = variant.family

    fv = serializer.deserialize_family_variant(
        variant_blob, family, extra_blob)

    assert fv.get_attribute("someAttr")[0] == "asdf"
Example #3
0
def test_extra_attributes_loading_with_person_id(
        fixtures_gpf_instance, fixture_dirname):
    families_loader = FamiliesLoader(
        fixture_dirname("backends/denovo-db-person-id.ped"))
    families_data = families_loader.load()

    params = {
        "denovo_chrom": "Chr",
        "denovo_pos": "Position",
        "denovo_ref": "Ref",
        "denovo_alt": "Alt",
        "denovo_person_id": "SampleID"
    }

    loader = DenovoLoader(
        families_data, fixture_dirname("backends/denovo-db-person-id.tsv"),
        fixtures_gpf_instance.get_genome(),
        params=params
    )

    it = loader.full_variants_iterator()
    variants = list(it)
    assert len(variants) == 17
    family_variants = [v[1][0] for v in variants]
    assert family_variants[0].get_attribute("StudyName")[0] == "Turner_2017"
    assert family_variants[1].get_attribute("StudyName")[0] == "Turner_2017"
    assert family_variants[2].get_attribute("StudyName")[0] == "Turner_2017"
    assert family_variants[3].get_attribute("StudyName")[0] == "Lelieveld2016"
    for variant in family_variants:
        print(variant)
Example #4
0
def test_families_genotypes_decorator_broken_x(fixture_dirname, genome_2013):

    families_loader = FamiliesLoader(
        fixture_dirname("backends/denovo_families.txt"),
        **{"ped_file_format": "simple"},
    )
    families = families_loader.load()

    variants_loader = DenovoLoader(
        families, fixture_dirname("backends/denovo_X_broken.txt"), genome_2013)

    for sv, fvs in variants_loader.full_variants_iterator():
        for fv in fvs:
            print(fv, fv.genetic_model)
            assert fv.genetic_model == GeneticModel.X_broken
Example #5
0
def test_denovo_loader(genome_2013, fixture_dirname, fake_families, filename,
                       params):
    denovo_filename = fixture_dirname(f"denovo_import/{filename}")
    variants_loader = DenovoLoader(fake_families,
                                   denovo_filename,
                                   genome=genome_2013,
                                   params=params)

    vs = list(variants_loader.full_variants_iterator())
    print(vs)

    def falt_allele(index):
        return vs[index][1][0].alt_alleles[0]

    fa = falt_allele(0)
    print(fa, fa.variant_in_members, fa.inheritance_in_members)
    assert fa.inheritance_in_members[2] == Inheritance.denovo
    assert fa.inheritance_in_members[4] == Inheritance.denovo
    assert fa.inheritance_in_members == [
        Inheritance.unknown,
        Inheritance.unknown,
        Inheritance.denovo,
        Inheritance.missing,
        Inheritance.denovo,
    ]

    fa = falt_allele(1)
    print(fa, fa.variant_in_members, fa.inheritance_in_members)
    assert fa.inheritance_in_members[2] == Inheritance.denovo
    assert fa.inheritance_in_members == [
        Inheritance.unknown,
        Inheritance.unknown,
        Inheritance.denovo,
        Inheritance.missing,
        Inheritance.missing,
    ]

    fa = falt_allele(2)
    print(fa, fa.variant_in_members, fa.inheritance_in_members)
    assert fa.inheritance_in_members[3] == Inheritance.denovo
    assert fa.inheritance_in_members == [
        Inheritance.unknown,
        Inheritance.unknown,
        Inheritance.missing,
        Inheritance.denovo,
    ]

    fa = falt_allele(3)
    print(fa, fa.variant_in_members, fa.inheritance_in_members)

    assert fa.inheritance_in_members[0] == Inheritance.denovo
    assert fa.inheritance_in_members == [Inheritance.denovo]

    fa = falt_allele(4)
    print(fa, fa.variant_in_members, fa.inheritance_in_members)

    assert fa.inheritance_in_members[0] == Inheritance.denovo
    assert fa.inheritance_in_members == [
        Inheritance.denovo,
        Inheritance.denovo,
    ]