def identify_zerops(region_list, zero_list): z_region_list = list() for zero in zero_list: # print('Current ZTup being considered:') # print zero for region in region_list: ## print('Current Region being considered:') ## print region if str(zero.chro) == str(region.chro) and pc_toolbox.in_interval( float(zero.pos), float(region.start), float(region.end) ): if region not in z_region_list: z_region_list.append(region) print ( """ *********************************************************** NOTE: REMOVING Region {0} from first list due to: {1} at chr{2}:{3} with p=0. *********************************************************** """.format( region.ID, zero.lz, zero.chro, zero.pos ) ) sys.stdout.flush() continue # region_list.remove(region) sys.stdout.flush() return region_list, z_region_list
def read_meta(meta_loc, region, outfolder,build, annot_dict=None): line1 = True standardizer = 1e6 reg_chr = region.chro reg_start = int(float(region.start) *standardizer) reg_end = int(float(region.end) * standardizer) sym = region.sym band = region.band outfile = os.path.join(outfolder,band+'.tbl') out = open(outfile, mode = "w") title_list = ['imchip_name','rs_name','chr','{0}_pos'.format(build),'p-value','z-score','weight','a1','a2'] out.write('\t'.join(title_list)+'\n') with open(meta_loc, mode="r") as meta: for line in meta: if line1: meta_indices = pc_toolbox.read_meta_titles(line) line1 = False else: line_list = line.strip().split() snp_pos = int(line_list[meta_indices['pos']]) weight = line_list[meta_indices['weight']] if int(reg_chr) == int(line_list[meta_indices['chr']] ) and pc_toolbox.in_interval(snp_pos, reg_start, reg_end): if float(weight)>WEIGHT_MIN: p_val = line_list[meta_indices['p']] lz_snp = line_list[meta_indices['snp']] z_score = line_list[meta_indices['z']] a1 = line_list[meta_indices['a1']] a2 = line_list[meta_indices['a2']] snp_rs = annot_dict[lz_snp].rs snp_im = annot_dict[lz_snp].name out.write('\t'.join([snp_im,snp_rs,reg_chr,str(snp_pos),p_val,z_score,weight,a1,a2])+'\n') out.close()
def snp_in_interval(region_tuple, line_list, index_dict): reg_chr,reg_start,reg_end = region_tuple snp_pos = int(line_list[index_dict['pos']]) if not int(reg_chr) == int(line_list[index_dict['chr']]): return False elif pc_toolbox.in_interval(snp_pos, reg_start, reg_end): return True else: return False
def read_assoc(assoc_loc, gene_name,gene_region_dict): line1 = True old_p = 1 old_out=[] reg_chr, reg_start, reg_end = gene_region_dict[gene_name] assoc_counter = 0 multi_counter = 1 sig_list = [] blank_dict = {'chr':'--','start':'--','end':'--','sym':'--','snp':'--', 'lz':'--','pval':'--','stat':'--','OR':'NA','aa1':'NA', 'pos':'--','notes':'--','maf':'NA','ma1':'NA','ma2':'NA'} order_list = ['chr','start','end','sym','snp','lz','pval','stat','OR', 'aa1','maf','ma1','ma2','notes'] with open(assoc_loc, mode="r") as assoc: for line in assoc: if line1: assoc_indices = read_assoc_titles(line) assoc_counter = 1 line1 = False else: line_list = line.strip().split() snp_pos = int(line_list[assoc_indices['pos']]) if not line_list[assoc_indices['p']] == 'NA': if reg_chr == int(line_list[assoc_indices['chr']]) and pc_toolbox.in_interval(snp_pos, reg_start, reg_end): cur_p = float(line_list[assoc_indices['p']]) cur_snp , lz_snp = pc_toolbox.correct_snp(assoc_indices,line_list) ## cur_snp = line_list[assoc_indices['snp']] ## lz_snp = '--' ## if not cur_snp.startswith('rs'): ## lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos) ## else: ## lz_snp = cur_snp cur_out = [str(reg_chr), str(reg_start), str(reg_end),gene_name, cur_snp,lz_snp, str(cur_p), str(snp_pos),'--'] if cur_p == old_p: multi_counter = multi_counter + 1 cur_tup = (cur_snp, lz_snp,str(cur_p)) old_tup = (old_out[4], old_out[5],str(old_p)) sig_list.append(cur_tup) if old_tup not in sig_list: sig_list.append(old_tup) print('NOTE: Both {0} and {1} have p-values of {2}.'.format(old_out[4],cur_snp, cur_p)) print('There are no Z-scores with which to decide between them, so I arbitrarily retain {0}.'.format(old_out[4])) old_out[8]='{0}SNPs(p={1})'.format(multi_counter, old_p) print(old_out[8]) elif cur_p < old_p: multi_counter = 1 old_p = cur_p old_out = cur_out sig_list = [] assoc_counter = assoc_counter + 1 return old_out, sig_list
def filter_by_region(region, file_in, file_out, index_dict): line1 = True with open(file_in, mode="r") as ifiley: with open(file_out, mode="w") as ofiley: for line in ifiley: line_split = line.strip().split() if line1: ofiley.write('\t'.join(line_split)+'\n') line1=False else: bp = line_split[index_dict['pos']] chro = line_split[index_dict['chr']] if int(region.chro) == int(chro) and pc_toolbox.in_interval(int(bp),region.start,region.end): ofiley.write('\t'.join(line_split)+'\n')
def read_table(table_loc, region_list, assoc, annot_dict): #print("Now entering read_table.") line1 = True standardizer = 1e6 snp_list = [] with open(table_loc, mode="r") as table: for line in table: if line1: if assoc: table_indices = pc_toolbox.read_assoc_titles(line, .95, 'logistic') else: table_indices = pc_toolbox.read_meta_titles(line) line1=False else: line_list = line.strip().split() snp_pos = int(line_list[table_indices['pos']]) chro = int(line_list[table_indices['chr']]) p_str = line_list[table_indices['p']] #print line_list p_ok = True if not pc_toolbox.is_number(p_str): p_ok = False elif float(p_str)>P_BOUND: p_ok=False in_reg = False for region in region_list: reg_chr = int(region.chro) reg_start = int(float(region.start) *standardizer) reg_end = int(float(region.end) * standardizer) ## print("Now checking region:") ## print region if reg_chr == chro and pc_toolbox.in_interval(snp_pos, reg_start, reg_end): in_reg = True if p_ok and not in_reg: snp_info = store_line(line, table_indices, assoc,annot_dict) snp_list.append(snp_info) print snp_list sys.stdout.flush() tuple_list = sorted(snp_list, key=attrgetter('chro', 'p')) #snp_list.sort(key= lambda info : info.chro) print snp_list print tuple_list sys.stdout.flush() return tuple_list
def remove_zerops(region_list,zero_list): for zero in zero_list: ## print('Current ZTup being considered:') ## print zero for region in region_list: ## print('Current region being considered:') ## print region if float(zero.chro) == float(region.chro) and pc_toolbox.in_interval(float(zero.pos), float(region.start), float(region.end)): print(''' *********************************************************** NOTE: REMOVING Region {0} from list due to: {1} at chr{2}:{3} with p=0. *********************************************************** '''.format(region.band,zero.lz,zero.chro,zero.pos)) sys.stdout.flush() region_list.remove(region) return region_list
def read_table(table_loc, gene, table_type, annot_dict=None): global weight_min, sex_weight ## print("Weight Min is:") ## print weight_min line1 = True standardizer = 1e6 ## if position_form == 'mb': ## standardizer = 1e6 ## elif position_form == 'kb': ## standardizer = 1e3 old_p = 1 old_out={} old_abs_z = 0 reg_chr = gene.chro reg_start = int(float(gene.start) *standardizer) reg_end = int(float(gene.end) * standardizer) sym = gene.sym #band = gene.band ID = gene.ID table_counter = 0 multi_counter = 1 zero_list = [] low_w_p = 1 with open(table_loc, mode="r") as table: for line in table: if line1: if table_type == 'META': index_dict = pc_toolbox.read_meta_titles(line) if table_type == 'FAMILY': index_dict = pc_toolbox.read_fam_titles(line) table_counter = 1 line1 = False old_out = {'chr':'','ID':'','start':'', 'end':'','sym':'','snp':'','im':'', 'lz':'','p':'1','pos':'', '|z|':'0','z':'0','note':'--', 'weight':'','maf':'','meta_a1':'', 'meta_a2':'', 'checkme':''} lw_blank = {'chr':'','ID':'','start':'', 'end':'','sym':'','snp':None,'im':None, 'lz':None,'p':'1','pos':'', '|z|':'0','z':'0','note':'--', 'weight':'','maf':'','meta_a1':'', 'meta_a2':''} low_weight_out = lw_blank #,'checkme':'0'} else: line_list = line.strip().split() snp_pos = int(line_list[index_dict['pos']]) if int(reg_chr) == int(line_list[index_dict['chr']] ) and pc_toolbox.in_interval(snp_pos, reg_start, reg_end): old_p = float(old_out['p']) cur_p = float(line_list[index_dict['p']]) lz_snp = line_list[index_dict['snp']] cur_w = float(line_list[index_dict['weight']]) cur_a1 = line_list[index_dict['a1']] cur_a2 = line_list[index_dict['a2']] cur_abs_z = abs(float(line_list[index_dict['z']])) cur_z = line_list[index_dict['z']] cur_snp = annot_dict[lz_snp].rs cur_im = annot_dict[lz_snp].name #snp, lz_snp = meta_toolbox.correct_snp(index_dict, line_list, annot_dict) ## if annot_dict is None: ## lz_snp=cur_snp ## if not cur_snp.startswith("rs"): ## lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos) ## else: ## lz_snp = annot_dict[cur_snp].lz ## if not cur_snp.startswith('rs'): ## lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos) ## else: ## lz_snp = cur_snp cur_out = {'chr':gene.chro,'ID':gene.ID,'start':gene.start, 'end':gene.end,'sym':gene.sym,'snp':cur_snp,'im':cur_im, 'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos), '|z|':str(cur_abs_z),'z':cur_z,'note':'--', #'checkme':'0', 'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2} if float(cur_out['weight']) < weight_min: #print("Weight ({0}) is less than Minimum ({1}).".format(str(cur_w),weight_min)) cur_out['checkme']='1' else: cur_out['checkme']='0' ## if cur_w > weight_min or (reg_chr in ['23','24'] and cur_w > sex_weight): ## cur_out = {'chr':gene.chro,'band':gene.band,'start':gene.start, ## 'end':gene.end,'sym':gene.sym,'snp':cur_snp, ## 'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos), ## '|z|':str(cur_abs_z),'z':cur_z,'note':'--','checkme':'0', ## 'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2} ## else: ## cur_out = {'chr':gene.chro,'band':gene.band,'start':gene.start, ## 'end':gene.end,'sym':gene.sym,'snp':cur_snp, ## 'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos), ## '|z|':str(cur_abs_z),'z':cur_z,'note':'--','checkme':'1', ## 'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2} if cur_p == old_p: multi_counter = multi_counter + 1 ZTup = namedtuple('ZTup','snp,lz,p,z,w') cur_tup = ZTup(snp=cur_out['snp'],lz=cur_out['lz'], p=cur_out['p'],z=cur_out['|z|'],w=cur_out['weight']) old_tup = ZTup(snp=old_out['snp'],lz=old_out['lz'], p=old_out['p'],z=old_out['|z|'],w=old_out['weight']) zero_list.append(cur_tup) if old_tup not in zero_list: zero_list.append(old_tup) print('NOTE: Both {0} and {1} have p-values of {2}.'.format(old_out['snp'],cur_snp, cur_p)) print('{0} has a z-score of: {1}'.format(old_out['snp'],old_out['z'])) print('{0} has a z-score of: {1}'.format(cur_out['snp'],cur_out['z'])) if cur_out['|z|'] > old_out['|z|'] and cur_out['checkme']=='0': old_p = cur_p old_abs_z = cur_abs_z old_out.update(cur_out) print('Retaining {0} as the significant SNP based on z-score value.'.format(old_out['snp'])) old_out['note']='{0}SNPs(p={1})'.format(multi_counter, old_p) elif cur_p < old_p: if cur_out['checkme']=='0': multi_counter = 1 old_p = cur_p old_out.update(cur_out) zero_list = [] if not low_weight_out['p'] == '1': if cur_p < float(low_weight_out['p']): low_weight_out = lw_blank low_weight_out['p']='1' low_weight_out['lz']=None print("Wiping low weight snp!") elif cur_p < float(low_weight_out['p']): low_weight_out.update(cur_out) print('''Adding {0} to low weight SNP list due to weight = {1} p-value = {2}'''.format(cur_snp, low_weight_out['weight'],low_weight_out['p'])) table_counter = table_counter + 1 sorted_list = sorted(zero_list, key=lambda member: member[3], reverse=True) #print old_out return old_out, sorted_list, low_weight_out['lz']
't':'0','note':'--','checkme':'0'} else: line_info = read_assoc(line,.95, 'logistic') ## ## ## ## ##NOTE NOTE NOTE THIS IS WHERE EDITING STOPPED!!!! RESUME CHANGES HERE!!!!!!!!!!!!! ## ## ## ## line_list = line.strip().split() snp_pos = int(line_list[assoc_indices['pos']]) if int(reg_chr) == int(line_list[assoc_indices['chr']] ) and pc_toolbox.in_interval(snp_pos, reg_start, reg_end): old_p = float(old_out['p']) cur_p = float(line_list[assoc_indices['p']]) cur_snp = line_list[assoc_indices['snp']] cur_t = float(line_list[assoc_indices['t']]) cur_hi = line_list[assoc_indices['hi']] cur_lo = line_list[assoc_indices['lo']] cur_a1 = line_list[assoc_indices['a1']] cur_or = line_list[assoc_indices['or']] ## if not cur_snp.startswith('rs'): ## lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos) ## else: ## lz_snp = cur_snp ## if cur_w > weight_min or (reg_chr in ['23','24'] and cur_w > sex_weight): ## cur_out = {'chr':gene.chro,'band':gene.band,'start':gene.start,