Ejemplo n.º 1
0
def read_GWAS(file, chsize):
    logger.info('initiating read_file at {}'.format(get_time()))
    for chnk_num, chunk in enumerate(
            pd.read_csv(file, chunksize=chsize, header=None, sep='\t')):
        print("entering file {}: chunk: {}".format(file, chnk_num))
        if chnk_num < 1:
            check_headers(chunk)
        iterate_file(chnk_num, GWAS_set=chunk.itertuples(index=True))
Ejemplo n.º 2
0
def iterate_file(chnk_num, GWAS_set, SNP='SNP', i=0):
    logger.info('initiating iterate_file at {}'.format(get_time()))
    try:
        for i, _, _, _, _, _, _, FRQ, effect, SE, P, *V in GWAS_set:
            stat_test.init_compute(cur_study, FRQ, effect, SE, P, V, chnk_num)
    except:
        logger.error(sys.exc_info())
        logger.error(
            'during parseRsmerge at chunk {}, at SNP {}, at row {}'.format(
                chnk_num, SNP, i))
Ejemplo n.º 3
0
def init_file_reader():
    global cur_study
    logging.basicConfig(filename='../SNP.log',
                        filemode='a',
                        level=logging.DEBUG)

    logger.info('initiating SNP conformer at {}'.format(get_time()))
    # list containing all the 'study' object with meta data as attributes
    files = read_meta()
    for n, study in enumerate(files):
        path = study.get('path')
        read_GWAS(path, 5000)
Ejemplo n.º 4
0
def iterate_file(chnk_num, GWAS_chnk, GWAS_set, SNP='SNP', i=0):
    logger.info('initiating iterate_file at {}'.format(get_time()))
    try:
        for i, _, _, SNP, BP, _, _, A1, A2, FRQ, effect, SE, P in GWAS_chnk:
            liftover.liftover_check(SNP, GWAS_set, chnk_num)
            reference_check.reference_check(SNP, BP, A1, A2, FRQ, effect,
                                            GWAS_set, chnk_num)
    except:
        logger.error(sys.exc_info())
        logger.error(
            'during parseRsmerge at chunk {}, at SNP {}, at row {}'.format(
                chnk_num, SNP, i))
Ejemplo n.º 5
0
def init_file_reader():
    logging.basicConfig(filename='../SNP.log',
                        filemode='a',
                        level=logging.DEBUG)

    logger.info('initiating SNP conformer at {}'.format(get_time()))

    # initiate collection object for querying the database
    liftover.init_collection()
    reference_check.init_collection()

    # call read file, to initiate read, SNP liftover and reference check procedure
    read_file(GWAS, 5000)
    # get stats at end of procedure for future reference
    get_stats()
Ejemplo n.º 6
0
def liftover_check(SNP, gwas_set, chk_num):
    global collection
    global liftover_sum, nohit_sum
    logger.info('initiating SNP conformer at {}'.format(get_time()))

    try:
        match = collection.fetch('rs_low', SNP)
        if match:
            entry = match[0]
            rs_cur = entry.get('rs_cur')
            if rs_cur != SNP:
                gwas_set.replace(SNP, rs_cur, inplace=True)
                liftover_sum += 1
            else:
                # log or not?
                pass
        else:
            nohit_sum += 1
    except:
        logger.error(sys.exc_info())
        logger.error('during liftover at chunk {}, at row {}'.format(
            chk_num, SNP))
Ejemplo n.º 7
0
def init_collection():
    global collection
    logger.info('initiating SNP1000 collection at {}'.format(get_time()))
    collection = DbQuerier('../DB/GWAS').db_collection('SNP1000')