def test_load_family_simple(fixture_name, temp_filename, fixture_dirname): family_filename = fixture_dirname(fixture_name) assert os.path.exists(family_filename) families = FamiliesLoader.load_simple_families_file(family_filename) assert families is not None FamiliesLoader.save_pedigree(families, temp_filename) families1 = FamiliesLoader.load_pedigree_file(temp_filename) assert set(families.keys()) == set(families1.keys())
def main(argv): args = parse_cli_arguments(argv[1:]) if args.id is not None: study_id = args.id else: study_id, _ = os.path.splitext(os.path.basename(args.family_filename)) if args.output is None: output = "{study_id}.ped".format(study_id=study_id) else: output = args.output fam_df = FamiliesLoader.load_simple_family_file(args.family_filename) FamiliesLoader.save_pedigree(fam_df, output)
def main(argv, gpf_instance=None): if gpf_instance is None: gpf_instance = GPFInstance() parser = argparse.ArgumentParser() parser.add_argument('--verbose', '-V', action='count', default=0) FamiliesLoader.cli_arguments(parser) VcfLoader.cli_arguments(parser, options_only=True) parser.add_argument( "-o", "--output", dest="output_filename", help="output families parquet filename " "(default is [basename(families_filename).ped])", ) parser.add_argument( "--partition-description", "--pd", help="input partition description filename", ) parser.add_argument( "--vcf-files", type=str, nargs="+", metavar="<VCF filename>", help="VCF file to import", ) argv = parser.parse_args(argv) if argv.verbose == 1: logging.basicConfig(level=logging.WARNING) elif argv.verbose == 2: logging.basicConfig(level=logging.INFO) elif argv.verbose >= 3: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.WARNING) filename, params = FamiliesLoader.parse_cli_arguments(argv) logger.info(F"PED PARAMS: {params}") loader = FamiliesLoader(filename, **params) families = loader.load() if argv.partition_description: partition_description = ParquetPartitionDescriptor.from_config( argv.partition_description) families = partition_description.add_family_bins_to_families(families) variants_filenames, variants_params = \ VcfLoader.parse_cli_arguments(argv) if variants_filenames: assert variants_filenames is not None variants_loader = VcfLoader( families, variants_filenames, params=variants_params, genome=gpf_instance.genomes_db.get_genome(), ) families = variants_loader.families if families.broken_families: for family_id, family in families.broken_families.items(): if not family.has_members(): del families[family_id] logger.warning( f"family {family_id} does not contain sequenced members " f"and is removed from the pedigree: {family}") if not argv.output_filename: output_filename, _ = os.path.splitext(os.path.basename(filename)) output_filename = f"{output_filename}.ped" else: output_filename = argv.output_filename FamiliesLoader.save_pedigree(families, output_filename)