def read_GWAS(file, chsize): logger.info('initiating read_file at {}'.format(get_time())) for chnk_num, chunk in enumerate( pd.read_csv(file, chunksize=chsize, header=None, sep='\t')): print("entering file {}: chunk: {}".format(file, chnk_num)) if chnk_num < 1: check_headers(chunk) iterate_file(chnk_num, GWAS_set=chunk.itertuples(index=True))
def iterate_file(chnk_num, GWAS_set, SNP='SNP', i=0): logger.info('initiating iterate_file at {}'.format(get_time())) try: for i, _, _, _, _, _, _, FRQ, effect, SE, P, *V in GWAS_set: stat_test.init_compute(cur_study, FRQ, effect, SE, P, V, chnk_num) except: logger.error(sys.exc_info()) logger.error( 'during parseRsmerge at chunk {}, at SNP {}, at row {}'.format( chnk_num, SNP, i))
def init_file_reader(): global cur_study logging.basicConfig(filename='../SNP.log', filemode='a', level=logging.DEBUG) logger.info('initiating SNP conformer at {}'.format(get_time())) # list containing all the 'study' object with meta data as attributes files = read_meta() for n, study in enumerate(files): path = study.get('path') read_GWAS(path, 5000)
def iterate_file(chnk_num, GWAS_chnk, GWAS_set, SNP='SNP', i=0): logger.info('initiating iterate_file at {}'.format(get_time())) try: for i, _, _, SNP, BP, _, _, A1, A2, FRQ, effect, SE, P in GWAS_chnk: liftover.liftover_check(SNP, GWAS_set, chnk_num) reference_check.reference_check(SNP, BP, A1, A2, FRQ, effect, GWAS_set, chnk_num) except: logger.error(sys.exc_info()) logger.error( 'during parseRsmerge at chunk {}, at SNP {}, at row {}'.format( chnk_num, SNP, i))
def init_file_reader(): logging.basicConfig(filename='../SNP.log', filemode='a', level=logging.DEBUG) logger.info('initiating SNP conformer at {}'.format(get_time())) # initiate collection object for querying the database liftover.init_collection() reference_check.init_collection() # call read file, to initiate read, SNP liftover and reference check procedure read_file(GWAS, 5000) # get stats at end of procedure for future reference get_stats()
def liftover_check(SNP, gwas_set, chk_num): global collection global liftover_sum, nohit_sum logger.info('initiating SNP conformer at {}'.format(get_time())) try: match = collection.fetch('rs_low', SNP) if match: entry = match[0] rs_cur = entry.get('rs_cur') if rs_cur != SNP: gwas_set.replace(SNP, rs_cur, inplace=True) liftover_sum += 1 else: # log or not? pass else: nohit_sum += 1 except: logger.error(sys.exc_info()) logger.error('during liftover at chunk {}, at row {}'.format( chk_num, SNP))
def init_collection(): global collection logger.info('initiating SNP1000 collection at {}'.format(get_time())) collection = DbQuerier('../DB/GWAS').db_collection('SNP1000')