Ejemplo n.º 1
0
def test_flexible_denovo_vcf_best_state(fixture_dirname, gpf_instance_2019,
                                        temp_dirname, genomes_db_2019):

    pedigree_filename = fixture_dirname(
        "flexible_short/flexible_short_families.ped")
    denovo_filename = fixture_dirname(
        "flexible_short/flexible_short_vcf_best_state.txt")

    genotype_storage_id = "test_filesystem"
    study_id = "test_flexible_denovo_vcf_best_state"

    storage_config = getattr(gpf_instance_2019.dae_config.storage,
                             genotype_storage_id)
    assert storage_config.storage_type == "filesystem"
    genotype_storage = FilesystemGenotypeStorage(storage_config,
                                                 genotype_storage_id)
    assert genotype_storage

    argv = [
        pedigree_filename,
        "--id",
        study_id,
        "--skip-reports",
        "--denovo-file",
        denovo_filename,
        "--denovo-family-id",
        "familyId",
        "--denovo-best-state",
        "bestState",
        "--denovo-location",
        "location",
        "--denovo-ref",
        "reference",
        "--denovo-alt",
        "alternative",
        "--genotype-storage",
        genotype_storage_id,
        "-o",
        temp_dirname,
    ]

    main(argv, gpf_instance_2019)

    storage_config = getattr(gpf_instance_2019.dae_config.storage,
                             genotype_storage_id)
    assert storage_config.storage_type == "filesystem"

    gpf_instance_2019.reload()
    study = gpf_instance_2019.get_genotype_data(study_id)
    assert study is not None

    vs = list(study.query_variants())
    assert_proper_flexible_short_variants(vs)
Ejemplo n.º 2
0
def test_import_iossifov2014_into_genotype_storage(
    genotype_storage_id,
    storage_type,
    genomes_db_2013,
    fixture_dirname,
    default_dae_config,
    gpf_instance_2013,
    temp_dirname,
):

    pedigree_filename = fixture_dirname(
        "dae_iossifov2014/iossifov2014_families.ped")
    denovo_filename = fixture_dirname("dae_iossifov2014/iossifov2014.txt")

    study_id = f"test_denovo_iossifov2014_{genotype_storage_id}"

    argv = [
        pedigree_filename,
        "--id",
        study_id,
        "--skip-reports",
        "--denovo-file",
        denovo_filename,
        "--denovo-location",
        "location",
        "--denovo-variant",
        "variant",
        "--denovo-family-id",
        "familyId",
        "--denovo-best-state",
        "bestState",
        "--genotype-storage",
        genotype_storage_id,
        "-o",
        temp_dirname,
    ]

    main(argv, gpf_instance_2013)

    gpf_instance_2013.reload()
    study = gpf_instance_2013.get_genotype_data(study_id)
    assert study is not None

    vs = list(study.query_variants())
    assert len(vs) == 16

    vs = list(study.query_variants(effect_types=["splice-site"]))
    assert len(vs) == 9

    vs = list(study.query_variants(effect_types=["no-frame-shift"]))
    assert len(vs) == 2
Ejemplo n.º 3
0
def test_import_comp_all_into_genotype_storage(
    genotype_storage_id,
    storage_type,
    genomes_db_2013,
    fixture_dirname,
    default_dae_config,
    gpf_instance_2013,
    temp_dirname,
):

    pedigree_filename = fixture_dirname("study_import/comp.ped")
    vcf_filename = fixture_dirname("study_import/comp.vcf")
    denovo_filename = fixture_dirname("study_import/comp.tsv")

    study_id = f"test_comp_all_{genotype_storage_id}"

    argv = [
        pedigree_filename,
        "--id",
        study_id,
        "--vcf-denovo-mode",
        "possible_denovo",
        "--vcf-omission-mode",
        "possible_omission",
        "--vcf-files",
        vcf_filename,
        "--denovo-file",
        denovo_filename,
        "--denovo-location",
        "location",
        "--denovo-variant",
        "variant",
        "--denovo-family-id",
        "familyId",
        "--denovo-best-state",
        "bestState",
        "--genotype-storage",
        genotype_storage_id,
        "-o",
        temp_dirname,
    ]

    main(argv, gpf_instance_2013)

    gpf_instance_2013.reload()
    study = gpf_instance_2013.get_genotype_data(study_id)
    assert study is not None

    vs = list(study.query_variants())
    assert len(vs) == 35
Ejemplo n.º 4
0
def test_flexible_denovo_dae_person(fixture_dirname, gpf_instance_2019,
                                    temp_dirname, genomes_db_2019):

    pedigree_filename = fixture_dirname(
        "flexible_short/flexible_short_families.ped")
    denovo_filename = fixture_dirname(
        "flexible_short/flexible_short_dae_person.txt")

    genotype_storage_id = "test_filesystem"
    study_id = "test_flexible_denovo_dae_person"

    storage_config = getattr(gpf_instance_2019.dae_config.storage,
                             genotype_storage_id)
    assert storage_config.storage_type == "filesystem"
    genotype_storage = FilesystemGenotypeStorage(storage_config,
                                                 genotype_storage_id)
    assert genotype_storage

    argv = [
        pedigree_filename,
        "--id",
        study_id,
        "--skip-reports",
        "--denovo-file",
        denovo_filename,
        "--denovo-variant",
        "variant",
        "--denovo-person-id",
        "person_id",
        "--genotype-storage",
        genotype_storage_id,
        "-o",
        temp_dirname,
    ]

    main(argv, gpf_instance_2019)

    storage_config = getattr(gpf_instance_2019.dae_config.storage,
                             genotype_storage_id)
    assert storage_config.storage_type == "filesystem"

    gpf_instance_2019.reload()
    study = gpf_instance_2019.get_genotype_data(study_id)
    assert study is not None

    vs = list(study.query_variants())
    assert_proper_flexible_short_variants(vs)
Ejemplo n.º 5
0
def test_import_study_config_arg(
    genomes_db_2013,
    fixture_dirname,
    default_dae_config,
    gpf_instance_2013,
    temp_dirname,
):

    genotype_storage_id = "test_filesystem"
    pedigree_filename = fixture_dirname("study_import/comp.ped")
    vcf_filename = fixture_dirname("study_import/comp.vcf")
    study_config = fixture_dirname("study_import/study_config.conf")

    study_id = f"test_comp_vcf_{genotype_storage_id}"

    argv = [
        pedigree_filename,
        "--id",
        study_id,
        "--skip-reports",
        "--vcf-denovo-mode",
        "possible_denovo",
        "--vcf-omission-mode",
        "possible_omission",
        "--vcf-files",
        vcf_filename,
        "--genotype-storage",
        genotype_storage_id,
        "--study-config",
        study_config,
        "-F",
        "-o",
        temp_dirname,
    ]

    main(argv, gpf_instance_2013)

    gpf_instance_2013.reload()
    study = gpf_instance_2013.get_genotype_data(study_id)
    assert study is not None
    config = gpf_instance_2013.get_genotype_data_config(study_id)
    assert config.name == "asdf"
    assert config.description == "Description from study config given to tool"

    vs = list(study.query_variants())
    assert len(vs) == 30
Ejemplo n.º 6
0
def test_import_denovo_dae_style_into_genotype_storage(
    genotype_storage_id,
    storage_type,
    genomes_db_2013,
    fixture_dirname,
    default_dae_config,
    gpf_instance_2013,
    temp_dirname,
):

    pedigree_filename = fixture_dirname("denovo_import/fake_pheno.ped")
    denovo_filename = fixture_dirname("denovo_import/variants_DAE_style.tsv")

    study_id = f"test_denovo_dae_style_{genotype_storage_id}"

    argv = [
        pedigree_filename,
        "--id",
        study_id,
        "--skip-reports",
        "--denovo-file",
        denovo_filename,
        "--denovo-location",
        "location",
        "--denovo-variant",
        "variant",
        "--denovo-family-id",
        "familyId",
        "--denovo-best-state",
        "bestState",
        "--genotype-storage",
        genotype_storage_id,
        "-o",
        temp_dirname,
    ]

    main(argv, gpf_instance_2013)

    gpf_instance_2013.reload()
    study = gpf_instance_2013.get_genotype_data(study_id)
    assert study is not None

    vs = list(study.query_variants())
    assert len(vs) == 5
Ejemplo n.º 7
0
def test_import_wild_multivcf_into_genotype_storage(
    genotype_storage_id,
    storage_type,
    genomes_db_2013,
    fixture_dirname,
    default_dae_config,
    gpf_instance_2013,
    temp_dirname,
):

    vcf_file1 = fixture_dirname("multi_vcf/multivcf_missing1_[vc].vcf.gz")
    vcf_file2 = fixture_dirname("multi_vcf/multivcf_missing2_[vc].vcf.gz")
    ped_file = fixture_dirname("multi_vcf/multivcf.ped")

    study_id = f"test_wile_multivcf_{genotype_storage_id}"

    argv = [
        ped_file,
        "--id",
        study_id,
        "--skip-reports",
        "--vcf-denovo-mode",
        "possible_denovo",
        "--vcf-omission-mode",
        "possible_omission",
        "--vcf-files",
        vcf_file1,
        vcf_file2,
        "--vcf-chromosomes",
        "chr1;chr2",
        "--genotype-storage",
        genotype_storage_id,
        "-o",
        temp_dirname,
    ]

    main(argv, gpf_instance_2013)

    gpf_instance_2013.reload()
    study = gpf_instance_2013.get_genotype_data(study_id)
    assert study is not None

    vs = list(study.query_variants())
    assert len(vs) == 48
Ejemplo n.º 8
0
def test_denovo_db_import(
    fixture_dirname,
    temp_dirname,
    genotype_storage_id,
    storage_type,
    gpf_instance_2013,
):

    families_filename = fixture_dirname("backends/denovo-db-person-id.ped")
    denovo_filename = fixture_dirname("backends/denovo-db-person-id.tsv")
    study_id = f"test_denovo_db_import_{genotype_storage_id}"

    argv = [
        "--study-id",
        study_id,
        families_filename,
        "-o",
        temp_dirname,
        "--skip-reports",
        "--denovo-chrom",
        "Chr",
        "--denovo-pos",
        "Position",
        "--denovo-ref",
        "Ref",
        "--denovo-alt",
        "Alt",
        "--denovo-person-id",
        "SampleID",
        "--denovo-file",
        denovo_filename,
        "--genotype-storage",
        genotype_storage_id,
    ]

    main(argv, gpf_instance_2013)

    gpf_instance_2013.reload()
    study = gpf_instance_2013.get_genotype_data(study_id)
    assert study is not None

    vs = list(study.query_variants(inheritance="denovo"))
    assert len(vs) == 17
Ejemplo n.º 9
0
def test_import_transmitted_dae_into_genotype_storage(
    genotype_storage_id,
    storage_type,
    genomes_db_2013,
    fixture_dirname,
    default_dae_config,
    gpf_instance_2013,
    temp_dirname,
):

    families_filename = fixture_dirname(
        "dae_transmitted/transmission.families.txt")
    summary_filename = fixture_dirname("dae_transmitted/transmission.txt.gz")
    study_id = f"test_dae_transmitted_{genotype_storage_id}"

    argv = [
        families_filename,
        "--ped-file-format",
        "simple",
        "--id",
        study_id,
        "--skip-reports",
        "--dae-summary-file",
        summary_filename,
        "--genotype-storage",
        genotype_storage_id,
        "-o",
        temp_dirname,
    ]

    main(argv, gpf_instance_2013)

    gpf_instance_2013.reload()
    study = gpf_instance_2013.get_genotype_data(study_id)
    assert study is not None

    vs = list(study.query_variants())
    assert len(vs) == 33
Ejemplo n.º 10
0
def test_add_chrom_prefix_simple(
    genotype_storage_id,
    storage_type,
    genomes_db_2013,
    fixture_dirname,
    default_dae_config,
    gpf_instance_2013,
    temp_dirname,
):
    pedigree_filename = fixture_dirname("study_import/comp.ped")
    vcf_filename = fixture_dirname("study_import/comp.vcf")
    denovo_filename = fixture_dirname("study_import/comp.tsv")

    study_id = f"test_comp_all_prefix_{genotype_storage_id}"
    genotype_storage_id = "test_filesystem"

    storage_config = default_dae_config.storage.test_filesystem
    assert storage_config.storage_type == "filesystem"

    argv = [
        pedigree_filename,
        "--id",
        study_id,
        "--skip-reports",
        "--vcf-denovo-mode",
        "possible_denovo",
        "--vcf-omission-mode",
        "possible_omission",
        "--vcf-files",
        vcf_filename,
        "--denovo-file",
        denovo_filename,
        "--denovo-location",
        "location",
        "--denovo-variant",
        "variant",
        "--denovo-family-id",
        "familyId",
        "--denovo-best-state",
        "bestState",
        "--genotype-storage",
        genotype_storage_id,
        "-o",
        temp_dirname,
        "--add-chrom-prefix",
        "ala_bala",
    ]

    main(argv, gpf_instance_2013)

    gpf_instance_2013.reload()

    study = gpf_instance_2013.get_genotype_data(study_id)
    assert study is not None

    vs = list(study.query_variants())
    assert len(vs) == 35

    for v in vs:
        print(v)
        assert v.chromosome.startswith("ala_bala")
        for va in v.alleles:
            print("\t", va)
            assert va.chromosome.startswith("ala_bala")