def test_flexible_denovo_vcf_best_state(fixture_dirname, gpf_instance_2019, temp_dirname, genomes_db_2019): pedigree_filename = fixture_dirname( "flexible_short/flexible_short_families.ped") denovo_filename = fixture_dirname( "flexible_short/flexible_short_vcf_best_state.txt") genotype_storage_id = "test_filesystem" study_id = "test_flexible_denovo_vcf_best_state" storage_config = getattr(gpf_instance_2019.dae_config.storage, genotype_storage_id) assert storage_config.storage_type == "filesystem" genotype_storage = FilesystemGenotypeStorage(storage_config, genotype_storage_id) assert genotype_storage argv = [ pedigree_filename, "--id", study_id, "--skip-reports", "--denovo-file", denovo_filename, "--denovo-family-id", "familyId", "--denovo-best-state", "bestState", "--denovo-location", "location", "--denovo-ref", "reference", "--denovo-alt", "alternative", "--genotype-storage", genotype_storage_id, "-o", temp_dirname, ] main(argv, gpf_instance_2019) storage_config = getattr(gpf_instance_2019.dae_config.storage, genotype_storage_id) assert storage_config.storage_type == "filesystem" gpf_instance_2019.reload() study = gpf_instance_2019.get_genotype_data(study_id) assert study is not None vs = list(study.query_variants()) assert_proper_flexible_short_variants(vs)
def test_import_iossifov2014_into_genotype_storage( genotype_storage_id, storage_type, genomes_db_2013, fixture_dirname, default_dae_config, gpf_instance_2013, temp_dirname, ): pedigree_filename = fixture_dirname( "dae_iossifov2014/iossifov2014_families.ped") denovo_filename = fixture_dirname("dae_iossifov2014/iossifov2014.txt") study_id = f"test_denovo_iossifov2014_{genotype_storage_id}" argv = [ pedigree_filename, "--id", study_id, "--skip-reports", "--denovo-file", denovo_filename, "--denovo-location", "location", "--denovo-variant", "variant", "--denovo-family-id", "familyId", "--denovo-best-state", "bestState", "--genotype-storage", genotype_storage_id, "-o", temp_dirname, ] main(argv, gpf_instance_2013) gpf_instance_2013.reload() study = gpf_instance_2013.get_genotype_data(study_id) assert study is not None vs = list(study.query_variants()) assert len(vs) == 16 vs = list(study.query_variants(effect_types=["splice-site"])) assert len(vs) == 9 vs = list(study.query_variants(effect_types=["no-frame-shift"])) assert len(vs) == 2
def test_import_comp_all_into_genotype_storage( genotype_storage_id, storage_type, genomes_db_2013, fixture_dirname, default_dae_config, gpf_instance_2013, temp_dirname, ): pedigree_filename = fixture_dirname("study_import/comp.ped") vcf_filename = fixture_dirname("study_import/comp.vcf") denovo_filename = fixture_dirname("study_import/comp.tsv") study_id = f"test_comp_all_{genotype_storage_id}" argv = [ pedigree_filename, "--id", study_id, "--vcf-denovo-mode", "possible_denovo", "--vcf-omission-mode", "possible_omission", "--vcf-files", vcf_filename, "--denovo-file", denovo_filename, "--denovo-location", "location", "--denovo-variant", "variant", "--denovo-family-id", "familyId", "--denovo-best-state", "bestState", "--genotype-storage", genotype_storage_id, "-o", temp_dirname, ] main(argv, gpf_instance_2013) gpf_instance_2013.reload() study = gpf_instance_2013.get_genotype_data(study_id) assert study is not None vs = list(study.query_variants()) assert len(vs) == 35
def test_flexible_denovo_dae_person(fixture_dirname, gpf_instance_2019, temp_dirname, genomes_db_2019): pedigree_filename = fixture_dirname( "flexible_short/flexible_short_families.ped") denovo_filename = fixture_dirname( "flexible_short/flexible_short_dae_person.txt") genotype_storage_id = "test_filesystem" study_id = "test_flexible_denovo_dae_person" storage_config = getattr(gpf_instance_2019.dae_config.storage, genotype_storage_id) assert storage_config.storage_type == "filesystem" genotype_storage = FilesystemGenotypeStorage(storage_config, genotype_storage_id) assert genotype_storage argv = [ pedigree_filename, "--id", study_id, "--skip-reports", "--denovo-file", denovo_filename, "--denovo-variant", "variant", "--denovo-person-id", "person_id", "--genotype-storage", genotype_storage_id, "-o", temp_dirname, ] main(argv, gpf_instance_2019) storage_config = getattr(gpf_instance_2019.dae_config.storage, genotype_storage_id) assert storage_config.storage_type == "filesystem" gpf_instance_2019.reload() study = gpf_instance_2019.get_genotype_data(study_id) assert study is not None vs = list(study.query_variants()) assert_proper_flexible_short_variants(vs)
def test_import_study_config_arg( genomes_db_2013, fixture_dirname, default_dae_config, gpf_instance_2013, temp_dirname, ): genotype_storage_id = "test_filesystem" pedigree_filename = fixture_dirname("study_import/comp.ped") vcf_filename = fixture_dirname("study_import/comp.vcf") study_config = fixture_dirname("study_import/study_config.conf") study_id = f"test_comp_vcf_{genotype_storage_id}" argv = [ pedigree_filename, "--id", study_id, "--skip-reports", "--vcf-denovo-mode", "possible_denovo", "--vcf-omission-mode", "possible_omission", "--vcf-files", vcf_filename, "--genotype-storage", genotype_storage_id, "--study-config", study_config, "-F", "-o", temp_dirname, ] main(argv, gpf_instance_2013) gpf_instance_2013.reload() study = gpf_instance_2013.get_genotype_data(study_id) assert study is not None config = gpf_instance_2013.get_genotype_data_config(study_id) assert config.name == "asdf" assert config.description == "Description from study config given to tool" vs = list(study.query_variants()) assert len(vs) == 30
def test_import_denovo_dae_style_into_genotype_storage( genotype_storage_id, storage_type, genomes_db_2013, fixture_dirname, default_dae_config, gpf_instance_2013, temp_dirname, ): pedigree_filename = fixture_dirname("denovo_import/fake_pheno.ped") denovo_filename = fixture_dirname("denovo_import/variants_DAE_style.tsv") study_id = f"test_denovo_dae_style_{genotype_storage_id}" argv = [ pedigree_filename, "--id", study_id, "--skip-reports", "--denovo-file", denovo_filename, "--denovo-location", "location", "--denovo-variant", "variant", "--denovo-family-id", "familyId", "--denovo-best-state", "bestState", "--genotype-storage", genotype_storage_id, "-o", temp_dirname, ] main(argv, gpf_instance_2013) gpf_instance_2013.reload() study = gpf_instance_2013.get_genotype_data(study_id) assert study is not None vs = list(study.query_variants()) assert len(vs) == 5
def test_import_wild_multivcf_into_genotype_storage( genotype_storage_id, storage_type, genomes_db_2013, fixture_dirname, default_dae_config, gpf_instance_2013, temp_dirname, ): vcf_file1 = fixture_dirname("multi_vcf/multivcf_missing1_[vc].vcf.gz") vcf_file2 = fixture_dirname("multi_vcf/multivcf_missing2_[vc].vcf.gz") ped_file = fixture_dirname("multi_vcf/multivcf.ped") study_id = f"test_wile_multivcf_{genotype_storage_id}" argv = [ ped_file, "--id", study_id, "--skip-reports", "--vcf-denovo-mode", "possible_denovo", "--vcf-omission-mode", "possible_omission", "--vcf-files", vcf_file1, vcf_file2, "--vcf-chromosomes", "chr1;chr2", "--genotype-storage", genotype_storage_id, "-o", temp_dirname, ] main(argv, gpf_instance_2013) gpf_instance_2013.reload() study = gpf_instance_2013.get_genotype_data(study_id) assert study is not None vs = list(study.query_variants()) assert len(vs) == 48
def test_denovo_db_import( fixture_dirname, temp_dirname, genotype_storage_id, storage_type, gpf_instance_2013, ): families_filename = fixture_dirname("backends/denovo-db-person-id.ped") denovo_filename = fixture_dirname("backends/denovo-db-person-id.tsv") study_id = f"test_denovo_db_import_{genotype_storage_id}" argv = [ "--study-id", study_id, families_filename, "-o", temp_dirname, "--skip-reports", "--denovo-chrom", "Chr", "--denovo-pos", "Position", "--denovo-ref", "Ref", "--denovo-alt", "Alt", "--denovo-person-id", "SampleID", "--denovo-file", denovo_filename, "--genotype-storage", genotype_storage_id, ] main(argv, gpf_instance_2013) gpf_instance_2013.reload() study = gpf_instance_2013.get_genotype_data(study_id) assert study is not None vs = list(study.query_variants(inheritance="denovo")) assert len(vs) == 17
def test_import_transmitted_dae_into_genotype_storage( genotype_storage_id, storage_type, genomes_db_2013, fixture_dirname, default_dae_config, gpf_instance_2013, temp_dirname, ): families_filename = fixture_dirname( "dae_transmitted/transmission.families.txt") summary_filename = fixture_dirname("dae_transmitted/transmission.txt.gz") study_id = f"test_dae_transmitted_{genotype_storage_id}" argv = [ families_filename, "--ped-file-format", "simple", "--id", study_id, "--skip-reports", "--dae-summary-file", summary_filename, "--genotype-storage", genotype_storage_id, "-o", temp_dirname, ] main(argv, gpf_instance_2013) gpf_instance_2013.reload() study = gpf_instance_2013.get_genotype_data(study_id) assert study is not None vs = list(study.query_variants()) assert len(vs) == 33
def test_add_chrom_prefix_simple( genotype_storage_id, storage_type, genomes_db_2013, fixture_dirname, default_dae_config, gpf_instance_2013, temp_dirname, ): pedigree_filename = fixture_dirname("study_import/comp.ped") vcf_filename = fixture_dirname("study_import/comp.vcf") denovo_filename = fixture_dirname("study_import/comp.tsv") study_id = f"test_comp_all_prefix_{genotype_storage_id}" genotype_storage_id = "test_filesystem" storage_config = default_dae_config.storage.test_filesystem assert storage_config.storage_type == "filesystem" argv = [ pedigree_filename, "--id", study_id, "--skip-reports", "--vcf-denovo-mode", "possible_denovo", "--vcf-omission-mode", "possible_omission", "--vcf-files", vcf_filename, "--denovo-file", denovo_filename, "--denovo-location", "location", "--denovo-variant", "variant", "--denovo-family-id", "familyId", "--denovo-best-state", "bestState", "--genotype-storage", genotype_storage_id, "-o", temp_dirname, "--add-chrom-prefix", "ala_bala", ] main(argv, gpf_instance_2013) gpf_instance_2013.reload() study = gpf_instance_2013.get_genotype_data(study_id) assert study is not None vs = list(study.query_variants()) assert len(vs) == 35 for v in vs: print(v) assert v.chromosome.startswith("ala_bala") for va in v.alleles: print("\t", va) assert va.chromosome.startswith("ala_bala")