def test_proband_column(fixture_dirname):
    ped_file = fixture_dirname("pedigrees/pedigree_no_role_F.ped")
    loader = FamiliesLoader(ped_file, **{"ped_no_role": True})
    families = loader.load()

    for person in families.persons.values():
        assert not person.has_attr("proband")

    ped_file = fixture_dirname("pedigrees/pedigree_no_role_H.ped")
    loader = FamiliesLoader(ped_file, **{"ped_no_role": True})
    families = loader.load()

    for person in families.persons.values():
        assert person.has_attr("proband")

    family = families.get("f1")
    assert family is not None

    members = family.full_members

    assert members[0].role == Role.maternal_grandfather
    assert members[1].role == Role.maternal_grandmother
    assert members[2].role == Role.paternal_grandfather
    assert members[3].role == Role.paternal_grandmother
    assert members[4].role == Role.dad
    assert members[5].role == Role.mom
    assert members[6].role == Role.maternal_aunt
    assert members[7].role == Role.unknown
    assert members[8].role == Role.unknown
    assert members[9].role == Role.paternal_uncle
    assert members[10].role == Role.prb
    assert members[11].role == Role.sib
    assert members[12].role == Role.maternal_cousin
    assert members[13].role == Role.paternal_cousin
Example #2
0
def fam2():
    families_loader = FamiliesLoader(StringIO(PED2), ped_sep=",")
    families = families_loader.load()
    family = families["f2"]

    assert len(family.trios) == 1
    return family
Example #3
0
def test_famlies_loader_simple(pedigree, fixture_dirname):
    filename = fixture_dirname(f"pedigrees/{pedigree}")
    assert os.path.exists(filename)
    loader = FamiliesLoader(filename)
    families = loader.load()

    assert families is not None
Example #4
0
def test_families_loader_no_role(pedigree, fixture_dirname):
    filename = fixture_dirname(f"pedigrees/{pedigree}")
    assert os.path.exists(filename)

    params = {
        "ped_no_role": True,
    }
    loader = FamiliesLoader(filename, **params)
    families = loader.load()

    assert families is not None
    assert isinstance(families, FamiliesData)

    fam = families["f1"]
    assert fam is not None

    persons = fam.get_members_with_roles(["prb"])
    assert len(persons) == 1

    person = persons[0]
    assert person.person_id == "f1.prb"

    persons = fam.get_members_with_roles(["sib"])
    assert len(persons) == 1

    person = persons[0]
    assert person.person_id == "f1.sib"
Example #5
0
def test_wild_vcf_loader_simple(fixture_dirname, gpf_instance_2013):

    vcf_file1 = fixture_dirname("multi_vcf/multivcf_missing1_chr[vc].vcf.gz")
    vcf_file2 = fixture_dirname("multi_vcf/multivcf_missing2_chr[vc].vcf.gz")
    ped_file = fixture_dirname("multi_vcf/multivcf.ped")

    families_loader = FamiliesLoader(ped_file)
    families = families_loader.load()

    variants_loader = VcfLoader(
        families,
        [vcf_file1, vcf_file2],
        gpf_instance_2013.genomes_db.get_genome(),
        params={
            "vcf_chromosomes": "1;2",
        },
    )
    assert variants_loader is not None

    assert len(variants_loader.vcf_loaders) == 2

    indexes = []
    for sv, fvs in variants_loader.full_variants_iterator():
        indexes.append(sv.summary_index)

    assert indexes == list(range(len(indexes)))
Example #6
0
def test_extra_attributes_loading_with_person_id(
        fixtures_gpf_instance, fixture_dirname):
    families_loader = FamiliesLoader(
        fixture_dirname("backends/denovo-db-person-id.ped"))
    families_data = families_loader.load()

    params = {
        "denovo_chrom": "Chr",
        "denovo_pos": "Position",
        "denovo_ref": "Ref",
        "denovo_alt": "Alt",
        "denovo_person_id": "SampleID"
    }

    loader = DenovoLoader(
        families_data, fixture_dirname("backends/denovo-db-person-id.tsv"),
        fixtures_gpf_instance.get_genome(),
        params=params
    )

    it = loader.full_variants_iterator()
    variants = list(it)
    assert len(variants) == 17
    family_variants = [v[1][0] for v in variants]
    assert family_variants[0].get_attribute("StudyName")[0] == "Turner_2017"
    assert family_variants[1].get_attribute("StudyName")[0] == "Turner_2017"
    assert family_variants[2].get_attribute("StudyName")[0] == "Turner_2017"
    assert family_variants[3].get_attribute("StudyName")[0] == "Lelieveld2016"
    for variant in family_variants:
        print(variant)
Example #7
0
def main(argv=sys.argv[1:]):

    parser = argparse.ArgumentParser(
        description="Produce a pedigree drawing in PDF format "
        "from a pedigree file with layout coordinates.",
        conflict_handler="resolve",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    parser.add_argument('--verbose', '-V', action='count', default=0)

    FamiliesLoader.cli_arguments(parser)

    parser.add_argument(
        "--output",
        "-o",
        metavar="o",
        help="the output filename file",
        default="output.pdf",
    )

    parser.add_argument(
        "--mode",
        type=str,
        default="report",
        dest="mode",
        help="mode of drawing; supported modes are `families` and `report`; "
        "defaults: `report`",
    )

    argv = parser.parse_args(argv)
    if argv.verbose == 1:
        logging.basicConfig(level=logging.WARNING)
    elif argv.verbose == 2:
        logging.basicConfig(level=logging.INFO)
    elif argv.verbose >= 3:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    logging.getLogger("matplotlib").setLevel(logging.WARNING)

    filename, params = FamiliesLoader.parse_cli_arguments(argv)
    families_loader = FamiliesLoader(filename, **params)
    families = families_loader.load()

    mode = argv.mode
    assert mode in ("families", "report")
    print("mode:", mode)
    if mode == "report":
        generator = draw_families_report(families)
    else:
        generator = draw_families(families)

    with PDFLayoutDrawer(argv.output) as pdf_drawer:

        for fig in generator:
            pdf_drawer.savefig(fig)
            plt.close(fig)
Example #8
0
def test_families_ped_df(pedigree, temp_filename, fixture_dirname):
    filename = fixture_dirname(f"pedigrees/{pedigree}")
    assert os.path.exists(filename)

    loader = FamiliesLoader(filename)
    families = loader.load()

    assert families._ped_df is None

    new_df = families.ped_df
    assert new_df is not None
Example #9
0
def test_wild_vcf_loader_pedigree_union(fixture_dirname, gpf_instance_2013):

    # f1: f1.mom f1.dad f1.p1 f1.s1
    # f2: f2.mom f2.dad f2.p1 f2.s1
    # f3: f3.mom f3.dad f3.p1 f3.s1
    # f4: f4.mom f4.dad f4.p1 f4.s1
    # f5: f5.mom f5.dad f5.p1 f5.s1

    vcf_file1 = fixture_dirname("multi_vcf/multivcf_pedigree1_chr[vc].vcf.gz")
    vcf_file2 = fixture_dirname("multi_vcf/multivcf_pedigree2_chr[vc].vcf.gz")
    ped_file = fixture_dirname("multi_vcf/multivcf.ped")

    families_loader = FamiliesLoader(ped_file)
    families = families_loader.load()

    variants_loader = VcfLoader(
        families,
        [vcf_file1, vcf_file2],
        gpf_instance_2013.genomes_db.get_genome(),
        params={
            "vcf_chromosomes": "1;2",
            "vcf_pedigree_mode": "union",
            "vcf_include_unknown_person_genotypes": True,
            "vcf_include_unknown_family_genotypes": True,
        },
    )

    assert variants_loader is not None

    assert len(variants_loader.vcf_loaders) == 2

    for vcf_loader in variants_loader.vcf_loaders:
        print(vcf_loader.families.persons)

    families = variants_loader.families
    families1 = variants_loader.vcf_loaders[0].families
    families2 = variants_loader.vcf_loaders[1].families

    for p1, p2 in zip(families1.persons.values(), families2.persons.values()):
        assert p1 == p2

    for fid in families1.keys():

        f1 = families1[fid]
        f2 = families2[fid]
        assert f1 == f2

    assert len(families.persons) == 20
    assert len(families1.persons) == 20
    assert len(families2.persons) == 20

    for person in families.persons.values():
        assert not person.missing, person
def test_pedigree_keep_family_order_local():
    loader = FamiliesLoader(StringIO(PED_FILE1), ped_sep=",")
    families = loader.load()

    f = families["SF0043014"]
    print(f.members_in_order)
    assert f.members_in_order[-1].role == Role.prb
    f = families["SF0033119"]
    print(f.members_in_order)
    assert f.members_in_order[-1].role == Role.prb
    f = families["SF0014912"]
    print(f.members_in_order)
    assert f.members_in_order[-1].role == Role.prb
Example #11
0
def dae_denovo(dae_denovo_config, genome_2013, annotation_pipeline_internal):

    families_loader = FamiliesLoader(dae_denovo_config.family_filename,
                                     **{"ped_file_format": "simple"})
    families = families_loader.load()

    variants_loader = DenovoLoader(families, dae_denovo_config.denovo_filename,
                                   genome_2013)

    variants_loader = AnnotationPipelineDecorator(
        variants_loader, annotation_pipeline_internal)
    fvars = RawMemoryVariants([variants_loader])
    return fvars
Example #12
0
def test_families_loader_roles_testing(fixture_dirname):
    filename = fixture_dirname("pedigrees/pedigree_no_role_C.ped")
    assert os.path.exists(filename)

    params = {
        "ped_no_role": True,
    }
    loader = FamiliesLoader(filename, **params)
    families = loader.load()

    assert families.persons["f1.mg_dad"].role == Role.maternal_grandfather
    assert families.persons["f1.mg_mom"].role == Role.maternal_grandmother
    assert families.persons["f1.pg_dad"].role == Role.paternal_grandfather
    assert families.persons["f1.pg_mom"].role == Role.paternal_grandmother
Example #13
0
def iossifov2014_loader(dae_iossifov2014_config, genome_2013,
                        annotation_pipeline_internal):
    config = dae_iossifov2014_config

    families_loader = FamiliesLoader(config.family_filename)
    families = families_loader.load()

    variants_loader = DenovoLoader(families, config.denovo_filename,
                                   genome_2013)

    variants_loader = AnnotationPipelineDecorator(
        variants_loader, annotation_pipeline_internal)

    return variants_loader, families_loader
Example #14
0
def test_vcf_info_annotator(fixture_dirname, genomes_db_2013):
    score_filename = fixture_dirname(
        "vcf_scores/gnomad.genomes.r2.1.1.sites.21.1_622.vcf.gz")

    columns = {
        "AC": "genome_gnomad_ac",
        "AF": "genome_gnomad_af",
        "AF_percent": "genome_gnomad_af_percent",
    }

    options = {
        "vcf": True,
        "c": "chrom",
        "p": "position",
        "r": "reference",
        "a": "alternative",
        "scores_file": score_filename,
    }

    config = AnnotationConfigParser.parse_section({
        "options": options,
        "columns": columns,
        "annotator": "vcf_info_annotator.VcfInfoAnnotator",
        "virtual_columns": [],
    })

    annotator = VcfInfoAnnotator(config, genomes_db_2013)
    assert annotator is not None

    vcf_filename = fixture_dirname(
        "vcf_scores/gnomad.genomes.r2.1.1.sites.21.trio.vcf.gz")
    pedigree_filename = fixture_dirname("vcf_scores/trio.ped")
    assert os.path.exists(vcf_filename)
    assert os.path.exists(pedigree_filename)

    families_loader = FamiliesLoader(pedigree_filename)
    families = families_loader.load()

    loader = VcfLoader(families, [vcf_filename], genomes_db_2013.get_genome())
    assert loader is not None

    for summary_variant, _ in loader.full_variants_iterator():
        liftover_variants = {}
        annotator.annotate_summary_variant(summary_variant, liftover_variants)

        for aa in summary_variant.alt_alleles:
            af = aa.get_attribute("genome_gnomad_af_percent")
            logger.debug(f"summary variant: {aa}; gnomad AF {af}%")
            assert af is not None
Example #15
0
def test_wild_vcf_loader_pedigree(fixture_dirname, gpf_instance_2013):

    vcf_file1 = fixture_dirname("multi_vcf/multivcf_pedigree1_chr[vc].vcf.gz")
    vcf_file2 = fixture_dirname("multi_vcf/multivcf_pedigree2_chr[vc].vcf.gz")
    ped_file = fixture_dirname("multi_vcf/multivcf.ped")

    families_loader = FamiliesLoader(ped_file)
    families = families_loader.load()

    variants_loader = VcfLoader(
        families,
        [vcf_file1, vcf_file2],
        gpf_instance_2013.genomes_db.get_genome(),
        params={
            "vcf_chromosomes": "1;2",
            "vcf_pedigree_mode": "fixed",
            "vcf_include_unknown_person_genotypes": True,
            "vcf_include_unknown_family_genotypes": True,
        },
    )

    assert variants_loader is not None

    assert len(variants_loader.vcf_loaders) == 2
    for vcf_loader in variants_loader.vcf_loaders:
        assert vcf_loader.fixed_pedigree

    indexes = []
    for sv, fvs in variants_loader.full_variants_iterator():
        indexes.append(sv.summary_index)
        for fv in fvs:
            print(fv)

    assert indexes == list(range(len(indexes)))

    for vcf_loader in variants_loader.vcf_loaders:
        print(vcf_loader.families.persons)

    families1 = variants_loader.vcf_loaders[0].families
    families2 = variants_loader.vcf_loaders[1].families

    for p1, p2 in zip(families1.persons.values(), families2.persons.values()):
        assert p1 == p2

    for fid in families1.keys():

        f1 = families1[fid]
        f2 = families2[fid]
        assert f1 == f2
Example #16
0
def test_families_genotypes_decorator_broken_x(fixture_dirname, genome_2013):

    families_loader = FamiliesLoader(
        fixture_dirname("backends/denovo_families.txt"),
        **{"ped_file_format": "simple"},
    )
    families = families_loader.load()

    variants_loader = DenovoLoader(
        families, fixture_dirname("backends/denovo_X_broken.txt"), genome_2013)

    for sv, fvs in variants_loader.full_variants_iterator():
        for fv in fvs:
            print(fv, fv.genetic_model)
            assert fv.genetic_model == GeneticModel.X_broken
Example #17
0
def test_families_loader_phenotype(fixture_dirname):
    filename = fixture_dirname("pedigrees/pedigree_D.ped")
    assert os.path.exists(filename)

    loader = FamiliesLoader(filename)
    families = loader.load()

    assert families is not None
    assert isinstance(families, FamiliesData)

    for fam_id, family in families.items():
        print(fam_id, family, family.persons)
        for person_id, person in family.persons.items():
            print(person)
            print(person.has_attr("phenotype"))
            assert person.has_attr("phenotype")
Example #18
0
def cnv_loader(
        fixture_dirname, genome_2013, annotation_pipeline_internal):

    families_filename = fixture_dirname("backends/cnv_ped.txt")
    variants_filename = fixture_dirname("backends/cnv_variants.txt")

    families_loader = FamiliesLoader(
        families_filename, **{"ped_file_format": "simple"})
    families = families_loader.load()

    variants_loader = CNVLoader(
        families, variants_filename, genome_2013)

    variants_loader = AnnotationPipelineDecorator(
        variants_loader, annotation_pipeline_internal
    )

    return families_loader, variants_loader
Example #19
0
    def builder(
        path,
        params={
            "vcf_include_reference_genotypes": True,
            "vcf_include_unknown_family_genotypes": True,
            "vcf_include_unknown_person_genotypes": True,
            "vcf_denovo_mode": "denovo",
            "vcf_omission_mode": "omission",
        },
    ):
        config = vcf_loader_data(path)

        families_loader = FamiliesLoader(config.pedigree)
        families = families_loader.load()

        loaders = []

        if config.denovo:
            denovo_loader = DenovoLoader(families,
                                         config.denovo,
                                         genomes_db_2013.get_genome(),
                                         params={
                                             "denovo_genotype": "genotype",
                                             "denovo_family_id": "family",
                                             "denovo_chrom": "chrom",
                                             "denovo_pos": "pos",
                                             "denovo_ref": "ref",
                                             "denovo_alt": "alt",
                                         })
            loaders.append(
                AnnotationPipelineDecorator(denovo_loader,
                                            default_annotation_pipeline))

        vcf_loader = VcfLoader(families, [config.vcf],
                               genomes_db_2013.get_genome(),
                               params=params)

        loaders.append(
            AnnotationPipelineDecorator(vcf_loader,
                                        default_annotation_pipeline))

        return loaders
Example #20
0
def test_families_loader_phenos(fixture_dirname):
    filename = fixture_dirname("pedigrees/pedigree_phenos.ped")
    assert os.path.exists(filename)

    loader = FamiliesLoader(filename)
    families = loader.load()

    assert families is not None
    assert isinstance(families, FamiliesData)

    for fam_id, family in families.items():
        for person_id, person in family.persons.items():
            assert person.has_attr("phenotype")
            assert person.has_attr("pheno2")
            assert person.has_attr("pheno3")

    ped_df = families.ped_df
    assert is_string_dtype(ped_df["pheno3"])
    assert is_string_dtype(ped_df["pheno2"])
    assert is_string_dtype(ped_df["phenotype"])
Example #21
0
def test_vcf_loader(vcf_loader_data, variants_vcf, fixture_data,
                    genomes_db_2013):
    conf = vcf_loader_data(fixture_data)
    print(conf)

    families_loader = FamiliesLoader(conf.pedigree)
    families = families_loader.load()

    loader = VcfLoader(
        families,
        [conf.vcf],
        genomes_db_2013.get_genome(),
        params={
            "vcf_include_reference_genotypes": True,
            "vcf_include_unknown_family_genotypes": True,
            "vcf_include_unknown_person_genotypes": True,
        },
    )
    assert loader is not None

    vars_new = list(loader.family_variants_iterator())

    for nfv in vars_new:
        print(nfv)
Example #22
0
def pedigree_test(fixture_dirname):
    loader = FamiliesLoader(fixture_dirname("pedigrees/test.ped"))
    families = loader.load()
    return families
Example #23
0
def main(argv):
    parser = argparse.ArgumentParser()

    parser.add_argument('--verbose', '-V', action='count', default=0)

    FamiliesLoader.cli_arguments(parser)
    parser.add_argument(
        "-o",
        "--output",
        dest="output_filename",
        help="output families parquet filename "
        "(default is [basename(families_filename).parquet])",
    )
    parser.add_argument(
        "--partition-description",
        "--pd",
        help="input partition description filename",
    )
    parser.add_argument(
        "--study-id",
        type=str,
        default=None,
        dest="study_id",
        metavar="<study id>",
        help="Study ID. "
        "If none specified, the basename of families filename is used to "
        "construct study id [default: basename(families filename)]",
    )
    argv = parser.parse_args(argv)
    if argv.verbose == 1:
        logging.basicConfig(level=logging.WARNING)
    elif argv.verbose == 2:
        logging.basicConfig(level=logging.INFO)
    elif argv.verbose >= 3:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.ERROR)

    filename, params = FamiliesLoader.parse_cli_arguments(argv)
    if argv.study_id is not None:
        study_id = argv.study_id
    else:
        study_id, _ = os.path.splitext(os.path.basename(filename))

    loader = FamiliesLoader(filename, **params)
    families = loader.load()

    if argv.partition_description:
        partition_description = ParquetPartitionDescriptor.from_config(
            argv.partition_description
        )
        if partition_description.family_bin_size > 0:
            families = partition_description \
                .add_family_bins_to_families(families)

    if not argv.output_filename:
        output_filename, _ = os.path.splitext(os.path.basename(filename))
        output_filename = f"{output_filename}.parquet"
    else:
        output_filename = argv.output_filename

    ParquetManager.families_to_parquet(families, output_filename)
Example #24
0
    def build(dirname):

        if not impala_helpers.check_database(impala_test_dbname()):
            impala_helpers.create_database(impala_test_dbname())

        vcfdirname = relative_to_this_test_folder(
            os.path.join("fixtures", dirname))
        vcf_configs = collect_vcf(vcfdirname)

        for config in vcf_configs:
            logger.debug(f"importing: {config}")

            filename = os.path.basename(config.pedigree)
            study_id = os.path.splitext(filename)[0]

            (variant_table, pedigree_table) = \
                impala_genotype_storage.study_tables(
                    FrozenBox({"id": study_id}))

            if (not reimport and impala_helpers.check_table(
                    impala_test_dbname(), variant_table)
                    and impala_helpers.check_table(impala_test_dbname(),
                                                   pedigree_table)):
                continue

            study_id = study_id_from_path(config.pedigree)
            study_temp_dirname = os.path.join(temp_dirname, study_id)

            families_loader = FamiliesLoader(config.pedigree)
            families = families_loader.load()
            genome = gpf_instance_2013.genomes_db.get_genome()

            loaders = []
            if config.denovo:
                denovo_loader = DenovoLoader(families,
                                             config.denovo,
                                             genome,
                                             params={
                                                 "denovo_genotype": "genotype",
                                                 "denovo_family_id": "family",
                                                 "denovo_chrom": "chrom",
                                                 "denovo_pos": "pos",
                                                 "denovo_ref": "ref",
                                                 "denovo_alt": "alt",
                                             })
                loaders.append(
                    AnnotationPipelineDecorator(denovo_loader,
                                                annotation_pipeline))

            vcf_loader = VcfLoader(
                families,
                [config.vcf],
                genome,
                regions=None,
                params={
                    "vcf_include_reference_genotypes": True,
                    "vcf_include_unknown_family_genotypes": True,
                    "vcf_include_unknown_person_genotypes": True,
                    "vcf_multi_loader_fill_in_mode": "reference",
                    "vcf_denovo_mode": "denovo",
                    "vcf_omission_mode": "omission",
                },
            )

            loaders.append(
                AnnotationPipelineDecorator(vcf_loader, annotation_pipeline))

            impala_genotype_storage.simple_study_import(
                study_id,
                families_loader=families_loader,
                variant_loaders=loaders,
                output=study_temp_dirname,
                include_reference=True)
Example #25
0
def main(argv, gpf_instance=None):
    if gpf_instance is None:
        gpf_instance = GPFInstance()

    parser = argparse.ArgumentParser()
    parser.add_argument('--verbose', '-V', action='count', default=0)

    FamiliesLoader.cli_arguments(parser)
    VcfLoader.cli_arguments(parser, options_only=True)

    parser.add_argument(
        "-o",
        "--output",
        dest="output_filename",
        help="output families parquet filename "
        "(default is [basename(families_filename).ped])",
    )
    parser.add_argument(
        "--partition-description",
        "--pd",
        help="input partition description filename",
    )
    parser.add_argument(
        "--vcf-files",
        type=str,
        nargs="+",
        metavar="<VCF filename>",
        help="VCF file to import",
    )

    argv = parser.parse_args(argv)
    if argv.verbose == 1:
        logging.basicConfig(level=logging.WARNING)
    elif argv.verbose == 2:
        logging.basicConfig(level=logging.INFO)
    elif argv.verbose >= 3:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    filename, params = FamiliesLoader.parse_cli_arguments(argv)
    logger.info(F"PED PARAMS: {params}")

    loader = FamiliesLoader(filename, **params)
    families = loader.load()

    if argv.partition_description:
        partition_description = ParquetPartitionDescriptor.from_config(
            argv.partition_description)
        families = partition_description.add_family_bins_to_families(families)

    variants_filenames, variants_params = \
        VcfLoader.parse_cli_arguments(argv)

    if variants_filenames:
        assert variants_filenames is not None

        variants_loader = VcfLoader(
            families,
            variants_filenames,
            params=variants_params,
            genome=gpf_instance.genomes_db.get_genome(),
        )

        families = variants_loader.families

    if families.broken_families:
        for family_id, family in families.broken_families.items():
            if not family.has_members():
                del families[family_id]
                logger.warning(
                    f"family {family_id} does not contain sequenced members "
                    f"and is removed from the pedigree: {family}")

    if not argv.output_filename:
        output_filename, _ = os.path.splitext(os.path.basename(filename))
        output_filename = f"{output_filename}.ped"
    else:
        output_filename = argv.output_filename

    FamiliesLoader.save_pedigree(families, output_filename)
Example #26
0
    def build_backend(self, study_config, genomes_db):
        if not study_config.genotype_storage.files:
            data_dir = self.get_data_dir(study_config.id, "data")
            vcf_filename = os.path.join(data_dir,
                                        "{}.vcf".format(study_config.id))
            ped_filename = os.path.join(data_dir,
                                        "{}.ped".format(study_config.id))

            families_loader = FamiliesLoader(ped_filename)
            families = families_loader.load()
            variants_loader = VcfLoader(families, [vcf_filename],
                                        genomes_db.get_genome())
            variants_loader = StoredAnnotationDecorator.decorate(
                variants_loader, vcf_filename)

            return RawMemoryVariants([variants_loader], families)

        else:
            start = time.time()
            ped_params = \
                study_config.genotype_storage.files.pedigree.params.to_dict()
            ped_filename = study_config.genotype_storage.files.pedigree.path
            logger.debug(f"pedigree params: {ped_filename}; {ped_params}")

            families_loader = FamiliesLoader(ped_filename, **ped_params)
            families = families_loader.load()
            elapsed = time.time() - start
            logger.info(f"families loaded in in {elapsed:.2f} sec")
            logger.debug(f"{families.ped_df.head()}")

            loaders = []
            for file_conf in study_config.genotype_storage.files.variants:
                start = time.time()
                variants_filename = file_conf.path
                variants_params = file_conf.params.to_dict()
                logger.debug(
                    f"variant params: {variants_filename}; {variants_params}")

                annotation_filename = variants_filename
                if file_conf.format == "vcf":
                    variants_filenames = [
                        fn.strip() for fn in variants_filename.split(" ")
                    ]
                    variants_loader = VcfLoader(
                        families,
                        variants_filenames,
                        genomes_db.get_genome(),
                        params=variants_params,
                    )
                    annotation_filename = variants_filenames[0]
                if file_conf.format == "denovo":
                    variants_loader = DenovoLoader(
                        families,
                        variants_filename,
                        genomes_db.get_genome(),
                        params=variants_params,
                    )
                if file_conf.format == "dae":
                    variants_loader = DaeTransmittedLoader(
                        families,
                        variants_filename,
                        genomes_db.get_genome(),
                        params=variants_params,
                    )
                if file_conf.format == "cnv":
                    variants_loader = CNVLoader(
                        families,
                        variants_filename,
                        genomes_db.get_genome(),
                        params=variants_params,
                    )

                variants_loader = StoredAnnotationDecorator.decorate(
                    variants_loader, annotation_filename)
                loaders.append(variants_loader)

            return RawMemoryVariants(loaders, families)
Example #27
0
def sample_family():
    families_loader = FamiliesLoader(StringIO(PED1), ped_sep=",")
    families = families_loader.load()
    family = families["f1"]
    assert len(family.trios) == 1
    return family
 def builder(relpath):
     filename = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                             "fixtures", relpath)
     loader = FamiliesLoader(filename, ped_sep=",")
     families = loader.load()
     return families