def read_assoc(assoc_loc, gene_name,gene_region_dict): line1 = True old_p = 1 old_out=[] reg_chr, reg_start, reg_end = gene_region_dict[gene_name] assoc_counter = 0 multi_counter = 1 sig_list = [] blank_dict = {'chr':'--','start':'--','end':'--','sym':'--','snp':'--', 'lz':'--','pval':'--','stat':'--','OR':'NA','aa1':'NA', 'pos':'--','notes':'--','maf':'NA','ma1':'NA','ma2':'NA'} order_list = ['chr','start','end','sym','snp','lz','pval','stat','OR', 'aa1','maf','ma1','ma2','notes'] with open(assoc_loc, mode="r") as assoc: for line in assoc: line_list = line.strip().split() if line1: assoc_i = pc_toolbox.read_assoc_titles(line, c_interval) ## assoc_indices = read_assoc_titles(line) assoc_counter = 1 line1 = False else: snp_pos = int(line_list[assoc_i['pos']]) if not line_list[assoc_i['p']] == 'NA': if reg_chr == int(line_list[assoc_i['chr']] ) and pc_toolbox.in_interval(snp_pos,reg_start, reg_end): cur_p = float(line_list[assoc_i['p']]) cur_snp , lz_snp = pc_toolbox.correct_snp(assoc_i,line_list) ## cur_snp = line_list[assoc_indices['snp']] ## lz_snp = '--' ## if not cur_snp.startswith('rs'): ## lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos) ## else: ## lz_snp = cur_snp cur_out = [str(reg_chr), str(reg_start), str(reg_end),gene_name, cur_snp,lz_snp, str(cur_p), str(snp_pos),'--'] if cur_p == old_p: multi_counter = multi_counter + 1 cur_tup = (cur_snp, lz_snp,str(cur_p)) old_tup = (old_out[4], old_out[5],str(old_p)) sig_list.append(cur_tup) if old_tup not in sig_list: sig_list.append(old_tup) print('NOTE: Both {0} and {1} have p-values of {2}.'.format(old_out[4],cur_snp, cur_p)) print('There are no Z-scores with which to decide between them, so I arbitrarily retain {0}.'.format(old_out[4])) old_out[8]='{0}SNPs(p={1})'.format(multi_counter, old_p) print(old_out[8]) elif cur_p < old_p: multi_counter = 1 old_p = cur_p old_out = cur_out sig_list = [] assoc_counter = assoc_counter + 1 return old_out, sig_list
def create_index_dict(table_loc, table_type): line1 = True with open(table_loc, mode = "r") as table: for line in table: if line1: line1 = False if table_type == 'family': index_dict = pc_toolbox.read_fam_titles(line) elif table_type == 'assoc': index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None,plink_test='logistic') else: index_dict = pc_toolbox.read_meta_titles(line) return index_dict
def filter_result(result_file, snpstar): line1 = True with open(result_file, mode="r") as result: for line in result: if line1: index_dict = pc_toolbox.read_assoc_titles(line, .95, 'logistic') line1 = False else: lsplit = line.strip().split() if lsplit[index_dict['snp']] == snpstar: #print('$$$ '+ line.strip()) info_tuple = pc_toolbox.assoc_tuple(line, index_dict) #print info_tuple return info_tuple
def edit_table(table_loc, table_type): global perm line1 = True ztup_list = list() ## lz_table_loc = new_table_loc(table_loc) with open(table_loc, mode="r") as table: for line in table: line_split = line.strip().split() if line1: if table_type == "assoc": index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None, plink_test="logistic") elif table_type == "eqtl": index_dict = achilleas_yank.read_eqtl_titles(line, perm) print index_dict else: index_dict = pc_toolbox.read_meta_titles(line) print index_dict line1 = False else: ## if line1: ## table_title_list = line_split ## parse_col() ## new_table.write('\t'.join(line_split)+'\n') ## line1 = False ## else: ## cur_chr = line_split[meta_chrcol] cur_chr = line_split[index_dict["chr"]] cur_pos = int(line_split[index_dict["pos"]]) ## if not line_split[index_dict['snp']].startswith('rs'): ## line_split[index_dict['snp']]='chr'+cur_chr+':'+str(cur_pos) if not line_split[index_dict["p"]] == "NA": if float(line_split[index_dict["p"]]) == 0: ZTup = namedtuple("ZTup", "lz,chro,pos,abs_z") if assoc: z_or_t = abs(float(line_split[index_dict["t"]])) else: z_or_t = abs(float(line_split[index_dict["z"]])) z = ZTup( lz=line_split[index_dict["snp"]], chro=line_split[index_dict["chr"]], pos=line_split[index_dict["pos"]], abs_z=z_or_t, ) ztup_list.append(z) ztup_sort = sorted(ztup_list, key=lambda z: z.abs_z, reverse=True) return index_dict, ztup_sort, table_loc
def make_snp_pos_list(input_assoc, output_name): snplist = list() line1 = True with open(input_assoc, mode="r") as assoc: for aline in assoc: if line1: index_dict = pc_toolbox.read_assoc_titles(aline, .95, 'logistic') line1 = False else: lsplit = aline.strip().split() snp_tuple = (lsplit[index_dict['snp']], lsplit[index_dict['pos']]) snplist.append(snp_tuple) with open(output_name, mode="w") as output: for snp in snplist: output.write('\t'.join(snp)+'\n') return snplist
def read_table(table_loc, region_list, assoc, annot_dict): #print("Now entering read_table.") line1 = True standardizer = 1e6 snp_list = [] with open(table_loc, mode="r") as table: for line in table: if line1: if assoc: table_indices = pc_toolbox.read_assoc_titles(line, .95, 'logistic') else: table_indices = pc_toolbox.read_meta_titles(line) line1=False else: line_list = line.strip().split() snp_pos = int(line_list[table_indices['pos']]) chro = int(line_list[table_indices['chr']]) p_str = line_list[table_indices['p']] #print line_list p_ok = True if not pc_toolbox.is_number(p_str): p_ok = False elif float(p_str)>P_BOUND: p_ok=False in_reg = False for region in region_list: reg_chr = int(region.chro) reg_start = int(float(region.start) *standardizer) reg_end = int(float(region.end) * standardizer) ## print("Now checking region:") ## print region if reg_chr == chro and pc_toolbox.in_interval(snp_pos, reg_start, reg_end): in_reg = True if p_ok and not in_reg: snp_info = store_line(line, table_indices, assoc,annot_dict) snp_list.append(snp_info) print snp_list sys.stdout.flush() tuple_list = sorted(snp_list, key=attrgetter('chro', 'p')) #snp_list.sort(key= lambda info : info.chro) print snp_list print tuple_list sys.stdout.flush() return tuple_list
def edit_table(table_loc, fix): line1=True ztup_list = [] lz_table_loc = new_table_loc(table_loc) with open(table_loc, mode = 'r') as table: if fix: lz_table = open(lz_table_loc, mode='w') #with open(lz_table_loc, mode='w')as lz_table: for line in table: line_split = line.strip().split() if line1: if assoc: index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None,plink_test='logistic') else: index_dict = pc_toolbox.read_meta_titles(line) line1=False else: ## if line1: ## table_title_list = line_split ## parse_col() ## new_table.write('\t'.join(line_split)+'\n') ## line1 = False ## else: ## cur_chr = line_split[meta_chrcol] cur_chr = line_split[index_dict['chr']] cur_pos = int(line_split[index_dict['pos']]) if not line_split[index_dict['snp']].startswith('rs'): line_split[index_dict['snp']]='chr'+cur_chr+':'+str(cur_pos) if not line_split[index_dict['p']]=='NA': if float(line_split[index_dict['p']])==0: ZTup = namedtuple('ZTup','lz,chro,pos') z = ZTup(lz=line_split[index_dict['snp']], chro=line_split[index_dict['chr']], pos=line_split[index_dict['pos']]) ztup_list.append(z) if fix: lz_table.write('\t'.join(line_split)+'\n') if fix: lz_table.close() table_loc = lz_table_loc return index_dict, ztup_list, table_loc
def read_table(table_loc, chromosome, index_dict): #global high_pos, low_pos low_pos = 5000000000 high_pos = 0 line1 = True with open(table_loc, mode='r') as table: for line in table: line_split = line.strip().split() if line1: if assoc: index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None,plink_test='logistic') else: index_dict = pc_toolbox.read_meta_titles(line) line1=False else: cur_chr = line_split[index_dict['chr']] cur_pos = int(line_split[index_dict['pos']]) if chromosome==cur_chr and cur_pos < low_pos: low_pos = cur_pos if chromosome==cur_chr and cur_pos > high_pos: high_pos = cur_pos return (low_pos,high_pos)
def read_assoc(assoc_loc, SNP_loc,c_interval,plink_test, exp): global p_bound, index_dict, firstloop tuple_list = [] with open(assoc_loc, mode = 'r') as assoc_file: line1_check = True for assoc_line in assoc_file: #open the assoc file to be read ## assoc_list = assoc_line.split() if line1_check: if firstloop: index_dict = pc_toolbox.read_assoc_titles(assoc_line, c_interval, plink_test) ## index_dict = identify_cols(assoc_list, c_interval, plink_test) firstloop = False if loopcount == 0: if os.path.exists(SNP_loc): clear = open(SNP_loc, mode='w') clear.close() line1_check = False else: tuplet = pc_toolbox.assoc_tuple(assoc_line, index_dict) ## tuplet = get_tuple(assoc_list, index_dict) if is_number(tuplet.p) and tuplet.p < float(p_bound)* math.pow(10,exp): tuple_list.append(tuplet) return tuple_list
'a1':a1_index, old_p = 1 old_out={} old_t = 0 reg_chr = gene.chro reg_start = int(float(gene.start) *standardizer) reg_end = int(float(gene.end) * standardizer) sym = gene.sym band = gene.band assoc_counter = 0 multi_counter = 1 zero_list = [] with open(assoc_loc, mode="r") as assoc: for line in meta: if line1: assoc_indices = pc_toolbox.read_assoc_titles(line) assoc_counter = 1 line1 = False old_out = {'chr':'','band':'','start':'', 'end':'','sym':'','snp':'', 'p':'1','pos':'', 'or':'0', 'hi':'0', 'lo':'0','a1':'--', 't':'0','note':'--','checkme':'0'} else: line_info = read_assoc(line,.95, 'logistic') ## ## ## ## ##NOTE NOTE NOTE THIS IS WHERE EDITING STOPPED!!!! RESUME CHANGES HERE!!!!!!!!!!!!! ##