def retrieve_fam_data(snp_list, fam_loc, out_loc, annot_dict): line1 = True out = open(out_loc, mode = "w") with open(fam_loc, mode = "r") as fam: for line in fam: if line1: index_dict = pc_toolbox.read_fam_titles(line) title_list = ['lz_snp','rs_snp','imchip_snp',FAM_CHR_TITLE,FAM_POSITION_TITLE, FAM_P_TITLE, FAM_Z_TITLE,FAM_WEIGHT_TITLE,FAM_A1_TITLE,FAM_A2_TITLE] out.write('\t'.join(title_list)+'\n') line1 = False else: line_list = line.strip().split() snp = line_list[index_dict['snp']] weight = line_list[index_dict['weight']] z = line_list[index_dict['z']] pos = line_list[index_dict['pos']] a1 = line_list[index_dict['a1']] a2 = line_list[index_dict['a2']] chro = line_list[index_dict['chr']] pval = line_list[index_dict['p']] if snp in snp_list: rs = annot_dict[snp].rs im = annot_dict[snp].name new_list = [snp,rs, im,chro,pos,pval,z,weight,a1,a2] #or annot_dict[snp].name in snp_list or annot_dict[snp].rs in snp_list: out.write('\t'.join(new_list)+'\n') out.close()
def repair_for_zs(z_list, lw_list, table_loc, build, table_type): print (''' LW_LIST is:''') print lw_list #fix_loc = fix_it.locate_fixed_meta(table_loc, build) base, ext = os.path.splitext(table_loc) new_loc = base + '_noZs'+ext line1 = True noZ = open(new_loc, mode = "w") with open(table_loc, mode="r") as tabby: for line in tabby: if line1: if table_type == 'FAMILY': index_dict = pc_toolbox.read_fam_titles(line) if table_type == 'META': index_dict = pc_toolbox.read_meta_titles(line) noZ.write(line) line1 = False else: line_list = line.strip().split() snp = line_list[index_dict['snp']] p = line_list[index_dict['p']] pos = line_list[index_dict['pos']] if p == '0': if snp in z_list: line_list[index_dict['p']] = '1e-101' else: line_list[index_dict['p']] = '1e-100' elif snp in lw_list: print('''Changing p-value of {0} from {1} to NA due to low weight coupled with high p-value.'''.format(snp, line_list[index_dict['p']])) line_list[index_dict['p']] = 'NA' noZ.write('\t'.join(line_list)+'\n') noZ.close()
def filter_table(meta_loc, list_loc,table_type='META'): intersect_list = read_list(list_loc) base, ext = os.path.splitext(meta_loc) new_meta_loc = base + '_intersect'+ext line1 = True meta = open(meta_loc, mode="r") new_meta = open(new_meta_loc, mode="w") for line in meta: if line1: if table_type == 'META': index_dict = pc_toolbox.read_meta_titles(line) elif table_type == 'FAMILY': index_dict = pc_toolbox.read_fam_titles(line) new_meta.write(line) line1 = False else: line_list = line.strip().split() snp = line_list[index_dict['snp']] for isnp in intersect_list: if snp == isnp: #print("Found in both lists: {0}".format(snp)) new_meta.write(line) continue meta.close() new_meta.close()
def create_index_dict(table_loc, table_type): line1 = True with open(table_loc, mode = "r") as table: for line in table: if line1: line1 = False if table_type == 'family': index_dict = pc_toolbox.read_fam_titles(line) elif table_type == 'assoc': index_dict = pc_toolbox.read_assoc_titles(line, c_interval=None,plink_test='logistic') else: index_dict = pc_toolbox.read_meta_titles(line) return index_dict
def list_info(table_loc, table_type): SnpFo = namedtuple('SnpFo','snp,chro,pos') fo_list = list() counter = 0 line1 = True with open(table_loc, mode="r") as table: for line in table: if line1: if table_type in ['MAP','HAPMAP']: index_dict = pc_toolbox.identify_map_indices() elif table_type in ['META']: index_dict = pc_toolbox.read_meta_titles(line) elif table_type == 'FAMILY': index_dict = pc_toolbox.read_fam_titles(line) line1 = False else: ls = line.strip().split() snp_fo = SnpFo(chro=ls[index_dict['chr']], snp=ls[index_dict['snp']], pos=ls[index_dict['pos']]) chro = snp_fo.chro fo_list.append(snp_fo) #sort_fo = sorted(fo_list, key=attrgetter('chro', 'pos')) return fo_list
def read_table(table_loc, gene, table_type, annot_dict=None): global weight_min, sex_weight ## print("Weight Min is:") ## print weight_min line1 = True standardizer = 1e6 ## if position_form == 'mb': ## standardizer = 1e6 ## elif position_form == 'kb': ## standardizer = 1e3 old_p = 1 old_out={} old_abs_z = 0 reg_chr = gene.chro reg_start = int(float(gene.start) *standardizer) reg_end = int(float(gene.end) * standardizer) sym = gene.sym #band = gene.band ID = gene.ID table_counter = 0 multi_counter = 1 zero_list = [] low_w_p = 1 with open(table_loc, mode="r") as table: for line in table: if line1: if table_type == 'META': index_dict = pc_toolbox.read_meta_titles(line) if table_type == 'FAMILY': index_dict = pc_toolbox.read_fam_titles(line) table_counter = 1 line1 = False old_out = {'chr':'','ID':'','start':'', 'end':'','sym':'','snp':'','im':'', 'lz':'','p':'1','pos':'', '|z|':'0','z':'0','note':'--', 'weight':'','maf':'','meta_a1':'', 'meta_a2':'', 'checkme':''} lw_blank = {'chr':'','ID':'','start':'', 'end':'','sym':'','snp':None,'im':None, 'lz':None,'p':'1','pos':'', '|z|':'0','z':'0','note':'--', 'weight':'','maf':'','meta_a1':'', 'meta_a2':''} low_weight_out = lw_blank #,'checkme':'0'} else: line_list = line.strip().split() snp_pos = int(line_list[index_dict['pos']]) if int(reg_chr) == int(line_list[index_dict['chr']] ) and pc_toolbox.in_interval(snp_pos, reg_start, reg_end): old_p = float(old_out['p']) cur_p = float(line_list[index_dict['p']]) lz_snp = line_list[index_dict['snp']] cur_w = float(line_list[index_dict['weight']]) cur_a1 = line_list[index_dict['a1']] cur_a2 = line_list[index_dict['a2']] cur_abs_z = abs(float(line_list[index_dict['z']])) cur_z = line_list[index_dict['z']] cur_snp = annot_dict[lz_snp].rs cur_im = annot_dict[lz_snp].name #snp, lz_snp = meta_toolbox.correct_snp(index_dict, line_list, annot_dict) ## if annot_dict is None: ## lz_snp=cur_snp ## if not cur_snp.startswith("rs"): ## lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos) ## else: ## lz_snp = annot_dict[cur_snp].lz ## if not cur_snp.startswith('rs'): ## lz_snp = 'chr'+str(reg_chr)+':'+ str(snp_pos) ## else: ## lz_snp = cur_snp cur_out = {'chr':gene.chro,'ID':gene.ID,'start':gene.start, 'end':gene.end,'sym':gene.sym,'snp':cur_snp,'im':cur_im, 'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos), '|z|':str(cur_abs_z),'z':cur_z,'note':'--', #'checkme':'0', 'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2} if float(cur_out['weight']) < weight_min: #print("Weight ({0}) is less than Minimum ({1}).".format(str(cur_w),weight_min)) cur_out['checkme']='1' else: cur_out['checkme']='0' ## if cur_w > weight_min or (reg_chr in ['23','24'] and cur_w > sex_weight): ## cur_out = {'chr':gene.chro,'band':gene.band,'start':gene.start, ## 'end':gene.end,'sym':gene.sym,'snp':cur_snp, ## 'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos), ## '|z|':str(cur_abs_z),'z':cur_z,'note':'--','checkme':'0', ## 'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2} ## else: ## cur_out = {'chr':gene.chro,'band':gene.band,'start':gene.start, ## 'end':gene.end,'sym':gene.sym,'snp':cur_snp, ## 'lz':lz_snp,'p':str(cur_p),'pos':str(snp_pos), ## '|z|':str(cur_abs_z),'z':cur_z,'note':'--','checkme':'1', ## 'weight':str(cur_w), 'meta_a1':cur_a1,'meta_a2':cur_a2} if cur_p == old_p: multi_counter = multi_counter + 1 ZTup = namedtuple('ZTup','snp,lz,p,z,w') cur_tup = ZTup(snp=cur_out['snp'],lz=cur_out['lz'], p=cur_out['p'],z=cur_out['|z|'],w=cur_out['weight']) old_tup = ZTup(snp=old_out['snp'],lz=old_out['lz'], p=old_out['p'],z=old_out['|z|'],w=old_out['weight']) zero_list.append(cur_tup) if old_tup not in zero_list: zero_list.append(old_tup) print('NOTE: Both {0} and {1} have p-values of {2}.'.format(old_out['snp'],cur_snp, cur_p)) print('{0} has a z-score of: {1}'.format(old_out['snp'],old_out['z'])) print('{0} has a z-score of: {1}'.format(cur_out['snp'],cur_out['z'])) if cur_out['|z|'] > old_out['|z|'] and cur_out['checkme']=='0': old_p = cur_p old_abs_z = cur_abs_z old_out.update(cur_out) print('Retaining {0} as the significant SNP based on z-score value.'.format(old_out['snp'])) old_out['note']='{0}SNPs(p={1})'.format(multi_counter, old_p) elif cur_p < old_p: if cur_out['checkme']=='0': multi_counter = 1 old_p = cur_p old_out.update(cur_out) zero_list = [] if not low_weight_out['p'] == '1': if cur_p < float(low_weight_out['p']): low_weight_out = lw_blank low_weight_out['p']='1' low_weight_out['lz']=None print("Wiping low weight snp!") elif cur_p < float(low_weight_out['p']): low_weight_out.update(cur_out) print('''Adding {0} to low weight SNP list due to weight = {1} p-value = {2}'''.format(cur_snp, low_weight_out['weight'],low_weight_out['p'])) table_counter = table_counter + 1 sorted_list = sorted(zero_list, key=lambda member: member[3], reverse=True) #print old_out return old_out, sorted_list, low_weight_out['lz']
def fix_table(table_loc, annot_dict, purpose, build): line1 = True counter = 0 if purpose in ['MAP','HAPMAP','FAMMAP']: ## ## (basepath, ext) = os.path.splitext(table_loc) ## if not meta: (basepath, ext) = os.path.splitext(table_loc) orig_rename = basepath + '~' new_loc = str(rename_as_necessary(orig_rename, ext)) + ext shutil.copy(table_loc, new_loc) output_loc = table_loc table_loc = new_loc else: output_loc = locate_fixed_meta(table_loc, build) ## else: ## output_loc = locate_fixed_meta(table_loc, build) with open(table_loc, mode="r") as table: output = open(output_loc, mode="w") index_dict = {'chr':0,'pos':3,'snp':1} error_list = list() for line in table: if counter > 5000: print line_list if line1 and purpose not in ['MAP','HAPMAP','FAMMAP']: if purpose == "META": index_dict = pc_toolbox.read_meta_titles(line) if purpose == "FAMILY": index_dict = pc_toolbox.read_fam_titles(line) ## if meta: ## index_dict = meta_toolbox.read_meta_titles(line) print index_dict line_list = line.strip().split() if not purpose in ['MAP','HAPMAP','FAMMAP']: line_list.append('annotation') output.write('\t'.join(line_list)+'\n') index_dict['annot']=len(line_list)-1 print index_dict line1 = False ## else: ## index_dict = {'chr':0,'pos':3,'snp':1} ## output.write(line) ## line1 = False else: line_list = line.strip().split() snp = line_list[index_dict['snp']] if purpose in ['FAMILY','FAMMAP'] and snp in FAM_DIFF.keys(): print("MADE IT INTO CASE") print snp snp = FAM_DIFF[snp] print snp if not purpose in ['MAP','HAPMAP','FAMMAP']: lz_annot = get_lz_annot(annot_dict, snp) line_list.append(lz_annot) ## chro = line_list[index_dict['chr']] ## pos = line_list[index_dict['pos']] if build == 'hg18': try: line_list[index_dict['chr']]= annot_dict[snp].hg18_chr.replace('X','23').replace('Y','24').replace('XY','23').replace('M','26') line_list[index_dict['pos']]= annot_dict[snp].hg18_pos except KeyError: error_list.append(list(line_list)) ## line_list[index_dict['chr']] = 'ERROR' ## line_list[index_dict['pos']]= 'ERROR' if build == 'hg19': try: line_list[index_dict['chr']]= annot_dict[snp].hg19_chr.replace('X','23').replace('Y','24').replace('XY','23').replace('M','26') line_list[index_dict['pos']]= annot_dict[snp].hg19_pos except KeyError: error_list.append(list(line_list)) ## line_list[index_dict['chr']] = 'ERROR' ## line_list[index_dict['pos']]= 'ERROR' ## line_list[index_dict['chr']]= annot_dict[snp].hg19_chr ## line_list[index_dict['pos']]= annot_dict[snp].hg19_pos if not purpose == 'HAPMAP': try: line_list[index_dict['snp']]= annot_dict[snp].lz except KeyError: print("{0} is missing from the dictionary!".format(line_list[index_dict['snp']])) ## line_list[index_dict['snp']] = 'ERROR' if counter > 5000: #print line_list counter = 0 counter = 1 + counter output.write('\t'.join(line_list)+'\n') output.close() base, ext = os.path.splitext(table_loc) error_loc = base + '_NAMEERRORS.txt' efile = open(error_loc,mode="w") efile.write('\t'.join(['CHR','RS','cM','POS'])+'\n') for error in error_list: efile.write('\t'.join(error)+'\n') efile.close()
def fix_table(table_loc, annot_dict, purpose, build): line1 = True counter = 0 ## (basepath, ext) = os.path.splitext(table_loc) if purpose in ["HAPMAP","MAP"]: (basepath, ext) = os.path.splitext(table_loc) orig_rename = basepath + '~' new_loc = str(rename_as_necessary(orig_rename, ext)) + ext shutil.copy(table_loc, new_loc) output_loc = table_loc table_loc = new_loc else: output_loc = locate_fixed_table(table_loc, build) with open(table_loc, mode="r") as table: output = open(output_loc, mode="w") index_dict = {'chr':0,'pos':3,'snp':1} error_list = list() for line in table: ## if counter > 5000: ## print line_list if line1: if purpose == "META": index_dict = pc_toolbox.read_meta_titles(line) elif purpose == "FAMILY": index_dict = pc_toolbox.read_fam_titles(line) output.write(line) line1 = False ## else: ## index_dict = {'chr':0,'pos':3,'snp':1} ## output.write(line) ## line1 = False else: line_list = line.strip().split() snp = line_list[index_dict['snp']] ## chro = line_list[index_dict['chr']] ## pos = line_list[index_dict['pos']] if build == 'hg18': try: line_list[index_dict['chr']]= annot_dict[snp].hg18_chr line_list[index_dict['pos']]= annot_dict[snp].hg18_pos except KeyError: error_list.append(list(line_list)) ## line_list[index_dict['chr']] = 'ERROR' ## line_list[index_dict['pos']]= 'ERROR' if build == 'hg19': try: line_list[index_dict['chr']]= annot_dict[snp].hg19_chr line_list[index_dict['pos']]= annot_dict[snp].hg19_pos except KeyError: error_list.append(list(line_list)) ## line_list[index_dict['chr']] = 'ERROR' ## line_list[index_dict['pos']]= 'ERROR' ## line_list[index_dict['chr']]= annot_dict[snp].hg19_chr ## line_list[index_dict['pos']]= annot_dict[snp].hg19_pos try: line_list[index_dict['snp']]= annot_dict[snp].lz except KeyError: print("{0} is missing from the dictionary!".format(line_list[index_dict['snp']])) ## line_list[index_dict['snp']] = 'ERROR' ## if counter > 5000: ## print line_list ## counter = 0 counter = 1 + counter output.write('\t'.join(line_list)+'\n') output.close() base, ext = os.path.splitext(table_loc) error_loc = base + '_NAMEERRORS.txt' efile = open(error_loc,mode="w") efile.write('\t'.join(['CHR','RS','cM','POS'])+'\n') for error in error_list: efile.write('\t'.join(error)+'\n') efile.close()