def test_denovo_loader_avoids_duplicates( genome_2013, fixture_dirname, fake_families, ): denovo_filename = fixture_dirname( "denovo_import/variants_VCF_style_dup.tsv") params = { "denovo_chrom": "chrom", "denovo_pos": "pos", "denovo_ref": "ref", "denovo_alt": "alt", "denovo_family_id": "familyId", "denovo_best_state": "bestState" } variants_loader = DenovoLoader(fake_families, denovo_filename, genome=genome_2013, params=params) vs = variants_loader.full_variants_iterator() svs = [] fvs = [] for sv, fvs_ in vs: print(sv, fvs) svs.append(sv) for fv in fvs_: fvs.append(fv) assert len(svs) == 3 assert len(fvs) == 4
def test_extra_attributes_serialization_deserialization( fixtures_gpf_instance, fixture_dirname): families_data = FamiliesLoader.load_simple_families_file( fixture_dirname("backends/iossifov_extra_attrs.ped")) loader = DenovoLoader( families_data, fixture_dirname("backends/iossifov_extra_attrs.tsv"), fixtures_gpf_instance.get_genome() ) main_schema = loader.get_attribute("annotation_schema") extra_attributes = loader.get_attribute("extra_attributes") serializer = AlleleParquetSerializer(main_schema, extra_attributes) it = loader.full_variants_iterator() variant = next(it)[1][0] print(variant.gt) summary_blobs = serializer.serialize_summary_data(variant.alleles) scores_blob = serializer.serialize_scores_data(variant.alleles) variant_blob = serializer.serialize_family_variant( variant.alleles, summary_blobs, scores_blob ) extra_blob = serializer.serialize_extra_attributes(variant) family = variant.family fv = serializer.deserialize_family_variant( variant_blob, family, extra_blob) assert fv.get_attribute("someAttr")[0] == "asdf"
def test_extra_attributes_loading_with_person_id( fixtures_gpf_instance, fixture_dirname): families_loader = FamiliesLoader( fixture_dirname("backends/denovo-db-person-id.ped")) families_data = families_loader.load() params = { "denovo_chrom": "Chr", "denovo_pos": "Position", "denovo_ref": "Ref", "denovo_alt": "Alt", "denovo_person_id": "SampleID" } loader = DenovoLoader( families_data, fixture_dirname("backends/denovo-db-person-id.tsv"), fixtures_gpf_instance.get_genome(), params=params ) it = loader.full_variants_iterator() variants = list(it) assert len(variants) == 17 family_variants = [v[1][0] for v in variants] assert family_variants[0].get_attribute("StudyName")[0] == "Turner_2017" assert family_variants[1].get_attribute("StudyName")[0] == "Turner_2017" assert family_variants[2].get_attribute("StudyName")[0] == "Turner_2017" assert family_variants[3].get_attribute("StudyName")[0] == "Lelieveld2016" for variant in family_variants: print(variant)
def test_families_genotypes_decorator_broken_x(fixture_dirname, genome_2013): families_loader = FamiliesLoader( fixture_dirname("backends/denovo_families.txt"), **{"ped_file_format": "simple"}, ) families = families_loader.load() variants_loader = DenovoLoader( families, fixture_dirname("backends/denovo_X_broken.txt"), genome_2013) for sv, fvs in variants_loader.full_variants_iterator(): for fv in fvs: print(fv, fv.genetic_model) assert fv.genetic_model == GeneticModel.X_broken
def test_denovo_loader(genome_2013, fixture_dirname, fake_families, filename, params): denovo_filename = fixture_dirname(f"denovo_import/{filename}") variants_loader = DenovoLoader(fake_families, denovo_filename, genome=genome_2013, params=params) vs = list(variants_loader.full_variants_iterator()) print(vs) def falt_allele(index): return vs[index][1][0].alt_alleles[0] fa = falt_allele(0) print(fa, fa.variant_in_members, fa.inheritance_in_members) assert fa.inheritance_in_members[2] == Inheritance.denovo assert fa.inheritance_in_members[4] == Inheritance.denovo assert fa.inheritance_in_members == [ Inheritance.unknown, Inheritance.unknown, Inheritance.denovo, Inheritance.missing, Inheritance.denovo, ] fa = falt_allele(1) print(fa, fa.variant_in_members, fa.inheritance_in_members) assert fa.inheritance_in_members[2] == Inheritance.denovo assert fa.inheritance_in_members == [ Inheritance.unknown, Inheritance.unknown, Inheritance.denovo, Inheritance.missing, Inheritance.missing, ] fa = falt_allele(2) print(fa, fa.variant_in_members, fa.inheritance_in_members) assert fa.inheritance_in_members[3] == Inheritance.denovo assert fa.inheritance_in_members == [ Inheritance.unknown, Inheritance.unknown, Inheritance.missing, Inheritance.denovo, ] fa = falt_allele(3) print(fa, fa.variant_in_members, fa.inheritance_in_members) assert fa.inheritance_in_members[0] == Inheritance.denovo assert fa.inheritance_in_members == [Inheritance.denovo] fa = falt_allele(4) print(fa, fa.variant_in_members, fa.inheritance_in_members) assert fa.inheritance_in_members[0] == Inheritance.denovo assert fa.inheritance_in_members == [ Inheritance.denovo, Inheritance.denovo, ]