Ejemplo n.º 1
0
def run(args):
    start = timer()
    Utilities.ensure_requisite_folders(args.output_prefix)
    logging.info("Loading SNP annotation")
    snp_key = KeyedDataSource.load_data(args.snp_annotation_file,
                                        "varID",
                                        "rsid_dbSNP150",
                                        should_skip=KeyedDataSource.skip_na)

    logging.info("Loading Genotype")
    genotype, individual_ids = ModelTraining.load_genotype_folder(
        args.input_genotype_folder, args.input_genotype_file_pattern, snp_key)

    logging.info("Saving Genotype")
    path_variant = args.output_prefix + ".variants.parquet"
    Parquet.save_variants(path_variant, genotype, individual_ids)

    path_metadata_variant = args.output_prefix + ".variants_metadata.parquet"
    Parquet.save_metadata(path_metadata_variant, genotype)

    logging.info("Processing Expression Phenotype")
    expression_logic = Utilities.file_logic(
        args.input_phenotype_folder, args.input_phenotype_expression_pattern)
    for row in expression_logic.itertuples():
        logging.info("Phenotype: %s", row.name)
        process_phenotype(row.path, row.name, args.output_prefix)
    end = timer()
    logging.info("Finished in %s", str(end - start))
def generate_multi_backend(args, variant_key):
    logging.info("Processing Genotype")
    dosage_conversion = GenotypeUtilities.impute_to_mean_conversion if args.impute_to_mean else None
    dosage_filter = get_filter(args, variant_key)
    metadata = []

    for genotype, individual_ids in ModelTraining.load_genotype_file_by_chromosome(
            args.input_genotype_file, variant_key, dosage_conversion,
            dosage_filter):
        if args.simplify_individual_id:
            logging.info("simplifying individual id")
            individual_ids = [x.split("_")[0] for x in individual_ids]

        _m = genotype.get_variants_metadata()

        metadata.append(_m)
        _chr = _m.chromosome.values[0]
        logging.log(9, "Processing {}".format(_chr))
        _o = args.output_prefix + ".chr{}".format(_chr) + ".variants.parquet"
        Parquet.save_variants(_o, genotype, individual_ids)

    logging.info("Saving metadata")
    metadata = pandas.concat(metadata)
    path_metadata_variant = args.output_prefix + ".variants_metadata.parquet"
    Parquet._save_metadata(path_metadata_variant, metadata)
Ejemplo n.º 3
0
def generate_single_backend(args, variant_key):
    logging.info("Loading Genotype")
    dosage_conversion = GenotypeUtilities.impute_to_mean_conversion if args.impute_to_mean else None
    dosage_filter = get_filter(args, variant_key)
    genotype, individual_ids = ModelTraining.load_genotype_file(args.input_genotype_file, variant_key, dosage_conversion, dosage_filter)

    logging.info("Saving Genotype")
    path_variant = args.output_prefix + ".variants.parquet"
    Parquet.save_variants(path_variant, genotype, individual_ids)

    logging.info("Saving Genotype")
    path_metadata_variant = args.output_prefix + ".variants_metadata.parquet"
    Parquet.save_metadata(path_metadata_variant, genotype)