コード例 #1
0
            logging.debug(record)
            raise
        for record_mapped in _map_line_to_json(record_parsed, hg19):
            yield record_mapped


def load_data(data_folder, version):
    # try to get logger from uploader
    import logging as loggingmod
    global logging
    logging = loggingmod.getLogger("clinvar_upload")

    import_clinvar_lib(data_folder)
    files = glob.glob(os.path.join(data_folder, GLOB_PATTERN))
    assert len(
        files) == 1, "Expecting only one file matching '%s', got: %s" % (
            GLOB_PATTERN, files)
    input_file = files[0]
    data_generator = rcv_feeder(input_file, version == "hg19")
    data_list = list(data_generator)
    # TODO: why do we sort this list ? this prevent from using yield/iterator
    data_list_sorted = sorted(data_list, key=lambda k: k['_id'])
    data_merge_rcv = merge_rcv_accession(data_list_sorted)
    return data_merge_rcv


if __name__ == "__main__":
    from biothings.utils.mongo import get_data_folder
    data_folder = get_data_folder("clinvar")
    load_data(data_folder=data_folder)
コード例 #2
0
            continue
        try:
            record_parsed = clinvar.parseString(record, silence=1)
        except:
            logging.debug(record)
            raise
        for record_mapped in _map_line_to_json(record_parsed, hg19):
            yield record_mapped

def load_data(data_folder, version):
    # try to get logger from uploader
    import logging as loggingmod
    global logging
    logging = loggingmod.getLogger("clinvar_upload")

    import_clinvar_lib(data_folder)
    files = glob.glob(os.path.join(data_folder,GLOB_PATTERN))
    assert len(files) == 1, "Expecting only one file matching '%s', got: %s" % (GLOB_PATTERN,files)
    input_file = files[0]
    data_generator = rcv_feeder(input_file, version == "hg19")
    data_list = list(data_generator)
    # TODO: why do we sort this list ? this prevent from using yield/iterator
    data_list_sorted = sorted(data_list, key=lambda k: k['_id'])
    data_merge_rcv = merge_rcv_accession(data_list_sorted)
    return data_merge_rcv

if __name__ == "__main__":
    from biothings.utils.mongo import get_data_folder
    data_folder = get_data_folder("clinvar")
    load_data(data_folder=data_folder)