Esempio n. 1
0
def main(input_filepath, output_mappings_filepath, output_curation_filepath,
         filters, zooma_host, oxo_target_list, oxo_distance):
    logger.info('Started parsing trait names')
    trait_names_list = parse_trait_names(input_filepath)
    trait_names_counter = Counter(trait_names_list)
    logger.info("Loaded {} trait names".format(len(trait_names_counter)))

    with open(output_mappings_filepath, "w", newline='') as mapping_file, \
            open(output_curation_filepath, "wt") as curation_file:
        mapping_writer = csv.writer(mapping_file, delimiter="\t")
        mapping_writer.writerow(["#clinvar_trait_name", "uri", "label"])
        curation_writer = csv.writer(curation_file, delimiter="\t")

        logger.info('Processing trait names in parallel')
        trait_list = [
            Trait(trait_name, freq)
            for trait_name, freq in trait_names_counter.items()
        ]
        trait_process_pool = multiprocessing.Pool(processes=12)

        processed_trait_list = [
            trait_process_pool.apply(process_trait,
                                     args=(trait, filters, zooma_host,
                                           oxo_target_list, oxo_distance))
            for trait in trait_list
        ]

        for trait in processed_trait_list:
            output_trait(trait, mapping_writer, curation_writer)

    logger.info('Finished processing trait names')
Esempio n. 2
0
def main(input_filepath, output_mappings_filepath, output_curation_filepath, filters, zooma_host, oxo_target_list,
         oxo_distance):
    logger.info('Started parsing trait names')
    trait_list = parse_trait_names(input_filepath)
    logger.info("Loaded {} trait names".format(len(trait_list)))

    with open(output_mappings_filepath, "w", newline='') as mapping_file, \
            open(output_curation_filepath, "wt") as curation_file:
        mapping_writer = csv.writer(mapping_file, delimiter="\t")
        mapping_writer.writerow(["#clinvar_trait_name", "uri", "label"])
        curation_writer = csv.writer(curation_file, delimiter="\t")

        logger.info('Processing trait names in parallel')
        trait_process_pool = multiprocessing.Pool(processes=24)
        processed_trait_list = [
            trait_process_pool.apply(
                process_trait,
                args=(trait, filters, zooma_host, oxo_target_list, oxo_distance)
            )
            for trait in trait_list
        ]

        logger.info('Writing output with the processed traits')
        for trait in processed_trait_list:
            # Remove non-specific trait names which should never be output
            if trait.name.lower() not in ClinVarTrait.NONSPECIFIC_TRAITS:
                output_trait(trait, mapping_writer, curation_writer)

    logger.info('Finished processing trait names')
Esempio n. 3
0
def main(input_filepath, output_mappings_filepath, output_curation_filepath, filters, zooma_host,
         oxo_target_list, oxo_distance):
    trait_names_list = parse_trait_names(input_filepath)
    trait_names_counter = Counter(trait_names_list)

    with open(output_mappings_filepath, "w", newline='') as mapping_file, \
            open(output_curation_filepath, "wt") as curation_file:
        mapping_writer = csv.writer(mapping_file, delimiter="\t")
        mapping_writer.writerow(["#clinvar_trait_name", "uri", "label"])
        curation_writer = csv.writer(curation_file, delimiter="\t")

        bar = progressbar.ProgressBar(max_value=len(trait_names_counter),
                                      widgets=[progressbar.AdaptiveETA(samples=1000)])

        for trait_name, freq in bar(trait_names_counter.items()):
            trait = Trait(trait_name, freq)
            trait = process_trait(trait, filters, zooma_host, oxo_target_list,
                                  oxo_distance)
            output_trait(trait, mapping_writer, curation_writer)
Esempio n. 4
0
def main(input_filepath, output_mappings_filepath, output_curation_filepath,
         filters, zooma_host, oxo_target_list, oxo_distance):
    trait_names_list = parse_trait_names(input_filepath)
    trait_names_counter = Counter(trait_names_list)

    with open(output_mappings_filepath, "w", newline='') as mapping_file, \
            open(output_curation_filepath, "wt") as curation_file:
        mapping_writer = csv.writer(mapping_file, delimiter="\t")
        mapping_writer.writerow(["#clinvar_trait_name", "uri", "label"])
        curation_writer = csv.writer(curation_file, delimiter="\t")

        bar = progressbar.ProgressBar(
            max_value=len(trait_names_counter),
            widgets=[progressbar.AdaptiveETA(samples=1000)])

        for trait_name, freq in bar(trait_names_counter.items()):
            trait = Trait(trait_name, freq)
            trait = process_trait(trait, filters, zooma_host, oxo_target_list,
                                  oxo_distance)
            output_trait(trait, mapping_writer, curation_writer)