Example #1
0
def test_load_family_simple(fixture_name, temp_filename, fixture_dirname):
    family_filename = fixture_dirname(fixture_name)
    assert os.path.exists(family_filename)

    families = FamiliesLoader.load_simple_families_file(family_filename)
    assert families is not None

    FamiliesLoader.save_pedigree(families, temp_filename)

    families1 = FamiliesLoader.load_pedigree_file(temp_filename)

    assert set(families.keys()) == set(families1.keys())
def main(argv):
    args = parse_cli_arguments(argv[1:])
    if args.id is not None:
        study_id = args.id
    else:
        study_id, _ = os.path.splitext(os.path.basename(args.family_filename))

    if args.output is None:
        output = "{study_id}.ped".format(study_id=study_id)
    else:
        output = args.output

    fam_df = FamiliesLoader.load_simple_family_file(args.family_filename)
    FamiliesLoader.save_pedigree(fam_df, output)
Example #3
0
def main(argv, gpf_instance=None):
    if gpf_instance is None:
        gpf_instance = GPFInstance()

    parser = argparse.ArgumentParser()
    parser.add_argument('--verbose', '-V', action='count', default=0)

    FamiliesLoader.cli_arguments(parser)
    VcfLoader.cli_arguments(parser, options_only=True)

    parser.add_argument(
        "-o",
        "--output",
        dest="output_filename",
        help="output families parquet filename "
        "(default is [basename(families_filename).ped])",
    )
    parser.add_argument(
        "--partition-description",
        "--pd",
        help="input partition description filename",
    )
    parser.add_argument(
        "--vcf-files",
        type=str,
        nargs="+",
        metavar="<VCF filename>",
        help="VCF file to import",
    )

    argv = parser.parse_args(argv)
    if argv.verbose == 1:
        logging.basicConfig(level=logging.WARNING)
    elif argv.verbose == 2:
        logging.basicConfig(level=logging.INFO)
    elif argv.verbose >= 3:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    filename, params = FamiliesLoader.parse_cli_arguments(argv)
    logger.info(F"PED PARAMS: {params}")

    loader = FamiliesLoader(filename, **params)
    families = loader.load()

    if argv.partition_description:
        partition_description = ParquetPartitionDescriptor.from_config(
            argv.partition_description)
        families = partition_description.add_family_bins_to_families(families)

    variants_filenames, variants_params = \
        VcfLoader.parse_cli_arguments(argv)

    if variants_filenames:
        assert variants_filenames is not None

        variants_loader = VcfLoader(
            families,
            variants_filenames,
            params=variants_params,
            genome=gpf_instance.genomes_db.get_genome(),
        )

        families = variants_loader.families

    if families.broken_families:
        for family_id, family in families.broken_families.items():
            if not family.has_members():
                del families[family_id]
                logger.warning(
                    f"family {family_id} does not contain sequenced members "
                    f"and is removed from the pedigree: {family}")

    if not argv.output_filename:
        output_filename, _ = os.path.splitext(os.path.basename(filename))
        output_filename = f"{output_filename}.ped"
    else:
        output_filename = argv.output_filename

    FamiliesLoader.save_pedigree(families, output_filename)