def main(argv): global out_base, freq_loc, meta_loc, bfile, table_type global covar_loc, annot, weight_min, build, fix_args global family cl_arguments(argv) #fix_it.main(fix_args) gene_region_list = pc_toolbox.create_region_list(region_loc) annot_dict_loc = fix_it.locate_annot_dict(build) ## print annot_dict_loc annot_dict = fix_it.build_annot_dict('LOG',annot_dict_loc) ## print("Annotation Dictionary as Follows:") ## print annot_dict ## gene_list = sorted(key_list,key=lambda gene:gene_region_dict[gene][0]) #head, tail = os.path.split(out_file) #keep_loc = os.path.join(out_file,'_plink_keep.txt') keep_loc = out_base+'_plink_keep.txt' plink_out = out_base+'_sigs_inCC' #fixed_meta_loc = fix_it.locate_fixed_meta(meta_loc, build) ## print fixed_meta_loc fixed_meta_loc = meta_loc index_dict = create_index_dict(fixed_meta_loc,table_type) z_list, lw_list = find_leastP_SNPs(gene_region_list, fixed_meta_loc, index_dict, out_base, freq_loc, keep_loc, weight_min, table_type, annot_dict) if bfile is not None and covar_loc is not None: plink_it(keep_loc, bfile, covar_loc, plink_out) repair_for_zs(z_list, lw_list, fixed_meta_loc, index_dict, build)
def main(argv): global table_loc, build cl_arguments(argv) build = 'hg18' dict_loc = fix_it.locate_annot_dict(build) annot_dict = fix_it.build_annot_dict('MAP', dict_loc) tf = fix_joe_map(table_loc,annot_dict,build) look_up_rs_joe.main()
def main(): global log_folder, summary_name, freq_loc, run_info, build #map_loc #, repair_loc cl_arguments(sys.argv[1:]) if log_folder is None: usage() sys.exit(2) os.chdir(log_folder) annot_dict_loc = fix_it.locate_annot_dict(build) annot_dict = fix_it.build_annot_dict('LOG',annot_dict_loc) summarize_folder(log_folder,freq_loc, summary_name,run_info, annot_dict)
def main(argv): region_loc = REGION_LOC build = BUILD meta_loc = META_LOC outfolder = OUTFOLDER region_list = pc_toolbox.create_region_list(region_loc) annot_dict_loc = fix_it.locate_annot_dict(build) annot_dict = fix_it.build_annot_dict('LOG',annot_dict_loc) fixed_meta_loc = fix_it.locate_fixed_meta(meta_loc, build) for region in region_list: read_meta(fixed_meta_loc, region, outfolder, build, annot_dict)
def main(argv): build = BUILD #list_file = LIST_FILE out_file = OUT_FILE outfolder = '/home/jkb4y/work/data/Joe/HapMapCorrect/' #map_file = MAP_FILE intersect_loc = INTERSECT_LOC assoc_loc = ASSOC_LOC title_list = ['imchip_name','rs_name','hg18_pos','hg19_pos','hg18_lz','intersect'] annot_dict_loc = fix_it.locate_annot_dict(build) annot_dict = fix_it.build_annot_dict('MAP',annot_dict_loc) intersect_annot_loc = fix_it.locate_annot_dict('hg19') intersect_annot_dict = fix_it.build_annot_dict('LOG',intersect_annot_loc) #im_list = old_read_list(list_file) intersect_list = read_list(intersect_loc, intersect_annot_dict) #map_and_key(map_file, out_file, annot_dict, intersect_list) ## index_dict = {'chr':0,'pos':3,'snp':1} ## out = open(out_file, mode="w") ## out.write('\t'.join(title_list)+'\n') ## with open(map_file, mode="r") as mappy: ## for line in mappy: ## line_list = line.strip().split() ## im=line_list[index_dict['snp']] ## info = look_up_snp(im, annot_dict) ## intersect = 'no' ## ## if info is not None: ## ## if info.name in intersect_list: ## intersect = 'yes' ## out.write('\t'.join([im,info.rs, info.hg18_pos, info.hg19_pos, ## info.lz,intersect]) +'\n') ## ## else: ## out.write('\t'.join([im,im,line_list[index_dict['pos']], ## '??','chr16:'+line_list[index_dict['pos']], ## intersect])+'\n') ## out.close() append_assoc(assoc_loc,outfolder, annot_dict, intersect_list)
def main(argv): cc_loc = CC_LOC fam_loc = FAM_LOC list_loc = LIST_LOC meta_loc = META_LOC chr_table = CHR_TABLE annot_loc = fix_it.locate_annot_dict('hg19') annot_dict = fix_it.build_annot_dict('LOG',annot_loc) intersect_list = make_list(cc_loc, fam_loc, BASE_LIST_LOC, annot_dict) #intersect_list = read_list(list_loc) filter_table(meta_loc, list_loc,'META') #intersect_list = read_list(list_loc) #filter_table(chr_table, list_loc) filter_table(FAM_TABLE, list_loc,'FAMILY')
def main(argv): global manifest_loc, table_loc, build cl_arguments(argv) if manifest_loc is not None: print("WRITING MANIFEST DICT!") write_manifest_dict(manifest_loc) dict_loc = locate_manifest_dict() manifest_dict = build_manifest_dict(dict_loc) annot_dict_loc = fix_it.locate_annot_dict(build) annot_dict = fix_it.build_annot_dict('HAPMAP', annot_dict_loc) fix_table(table_loc, build, manifest_dict, annot_dict) key_loc = locate_table_key(table_loc) create_key(table_loc, manifest_dict, annot_dict, key_loc)
def main(): build = BUILD #list_file = LIST_FILE out_file = OUT_FILE map_file = MAP_FILE intersect_loc = INTERSECT_LOC title_list = ['imchip_name','rs_name','hg18_pos','hg19_pos','hg18_lz','intersect'] annot_dict_loc = fix_it.locate_annot_dict(build) annot_dict = fix_it.build_annot_dict('MAP',annot_dict_loc) intersect_annot_loc = fix_it.locate_annot_dict('hg19') intersect_annot_dict = fix_it.build_annot_dict('LOG',intersect_annot_loc) #im_list = old_read_list(list_file) intersect_list = read_list(intersect_loc, intersect_annot_dict) index_dict = {'chr':0,'pos':3,'snp':1} out = open(out_file, mode="w") out.write('\t'.join(title_list)+'\n') with open(map_file, mode="r") as mappy: for line in mappy: line_list = line.strip().split() im=line_list[index_dict['snp']] info = look_up_snp(im, annot_dict) intersect = 'no' if info is not None: if info.name in intersect_list: intersect = 'yes' out.write('\t'.join([im,info.rs, info.hg18_pos, info.hg19_pos, info.lz,intersect]) +'\n') else: out.write('\t'.join([im,im,line_list[index_dict['pos']], '??','chr16:'+line_list[index_dict['pos']],intersect])+'\n') out.close()
def fix_eur(orig_table, out_table, fix_dict): line1 = True out = open(out_table, mode="w") key_list = fix_dict.keys() with open(orig_table, mode="r") as orig: for line in orig: if line1: index_dict = pc_toolbox.read_meta_titles(line) ls = line.strip().split() ls.append('eurlowP_SNP') out.write('\t'.join(ls)+'\n') #out.write(line) line1 = False else: lp_flag = '.' lsplit = line.strip().split() snp = lsplit[index_dict['snp']] if snp in key_list: lsplit[index_dict['z']]=fix_dict[snp].z #lsplit[index_dict['p']]=fix_dict[snp].p lsplit[index_dict['weight']]=fix_dict[snp].w lsplit[index_dict['a1']]=fix_dict[snp].a1 lsplit[index_dict['a2']]='.' lsplit[index_dict['direction']]='.' key_list.remove(snp) lp_flag = 'yes' lsplit.append(lp_flag) out.write('\t'.join(lsplit) + '\n') if len(key_list) > 0: print("{0} keys not found.".format(len(key_list))) annot_dict_loc = fix_it.locate_annot_dict('hg18') annot_dict = fix_it.build_annot_dict('MAP', annot_dict_loc) for key in key_list: #print fix_dict print(key) a1 = fix_dict[key].a1 a2 = '.' weight = fix_dict[key].w z = fix_dict[key].z p = '0' direction = '.' chro = annot_dict[key].hg18_chr pos = annot_dict[key].hg18_pos lp_flag = 'yes' key_info = [key,a1,a2,weight,z,p,direction,chro,pos,lp_flag] out.write('\t'.join(key_info)+'\n') #out.write('\t'.join(fix_dict[key])+'\n') out.close()
def main(): global out_file, table_loc, assoc, region_loc, build cl_arguments(sys.argv[1:]) region_list = pc_toolbox.create_region_list(region_loc) annot_dict_loc = fix_it.locate_annot_dict(build) purpose = 'LOG' annot_dict = fix_it.build_annot_dict(purpose,annot_dict_loc) ## annot_dict = None ## print("Annotation Dictionary as Follows:") ## print annot_dict ## if annot is not None: ## annot_dict = meta_toolbox.read_annot(annot) ## elif fix: ## annot_dict = meta_toolbox.read_annot_dict() snp_list = read_table(table_loc, region_list, assoc, annot_dict) print("inter_region_yank finds {0} SNPs of significance!".format(len(snp_list))) write_results(snp_list, out_file, assoc, annot_dict)
def main(argv): global table_loc, region_loc, achilleas_key, build,out_base weight_min = 0 cl_arguments(argv) perm = False if 'perm' in table_loc: perm = True achilleas_key_loc = ACHILLEAS_KEY_LOC achilleas_dict = build_key(achilleas_key_loc, 'IM') annot_dict_loc = fix_it.locate_annot_dict(build) annot_dict = fix_it.build_annot_dict('MAP',annot_dict_loc) base, ext = os.path.splitext(table_loc) multi_loc = base + '_JB'+ext write_multi_table(table_loc, multi_loc, achilleas_dict,annot_dict) ## region_list = pc_toolbox.create_region_list(region_loc) ## keep_loc = out_base+'_plink_keep.txt' ## plink_out = out_base+'_sigs_eQTL' index_dict = create_eqtl_index(multi_loc, perm) log_dict = fix_it.build_annot_dict('LOG',annot_dict_loc)
def main(argv): global manifest_loc, table_loc, build, table_type, purpose cl_arguments(argv) #repair_dict = fix_it.DUPLICATE_FIX_DICT #rs_dup_list = fix_it.RS_KEEP base, ext = os.path.splitext(table_loc) im_dup_loc = base + '_imDups.list' all_dup_loc = base + '_allDups.list' dup_names_loc = base + '_dupNames.list' #look up quinlan dictionary, using imchip names as key annot_dict_loc = fix_it.locate_annot_dict(build) key_type = table_type if not table_type == 'HAPMAP': key_type = 'LOG' annot_dict = fix_it.build_annot_dict(key_type, annot_dict_loc) #extract SNP info from table print("Now gathering info from table....") snpFo_list = list_info(table_loc, table_type) #check table for duplicates based on current SNP names print("Now searching list for duplicate snp names....") #rs_sort = sorted(snpFo_list, key=attrgetter('snp')) dup_name_count, dup_name_list = find_dup_names(snpFo_list) print("FOUND {0} DUPLICATE SNP NAMES!!".format(dup_name_count)) write_snp_list(dup_name_list, dup_names_loc) #check table for duplicates based on current SNP positions print("Now sorting list for snp positions....") sort_fo = sorted(snpFo_list, key=attrgetter('chro', 'pos')) non_rs_dups, all_dups, rs_dup = find_dups(sort_fo, annot_dict) write_snp_list(non_rs_dups, im_dup_loc) #check the duplicate list for the number of rs SNP names all_count, rs_count = count_dups(all_dups, all_dup_loc) print("\nTHERE ARE {0} TOTAL DUPLICATES --- {1} ORIGINALLY WITH RS-ID NAMES".format(all_count, rs_count)) #check for specific types of duplicates: #duplicates #fix table based on current duplicate data #repair_dict = pc_toolbox.read_dict(repair_loc) repair_dict = fix_it.DUPLICATE_FIX_DICT
def main(argv): meta_yank_loc = '/home/jkb4y/work/results/Intersection/eurmeta_06062012/RegionYank/eurmeta_yank.tbl' fam_loc = '/home/jkb4y/work/data/2012Feb1/Family_data/eurgdtscan_06062012_lz_hg19_intersect.txt' out_loc = '/home/jkb4y/work/results/Intersection/family/eurmeta_snp_info.tbl' yank_out = '/home/jkb4y/work/results/Intersection/family/RegionYank/family' BFILE = '/home/jkb4y/work/data/2012Feb1/intersect_fam_uk/hg19/intersect' FREQ = '/home/jkb4y/work/data/2012Feb1/intersect_fam_uk/hg19/intersect_controls.frq' REGION_LOC = '/home/jkb4y/work/data/Region_Lists/hg19/T1D_regions_hg19_05242012.txt' COVAR = '/home/jkb4y/work/data/2012Feb1/intersect_fam_uk/hg19/intersect.cov' build = 'hg19' ## ## table_yank.main(['--family',fam_loc, '--build',build, ## '--bfile',BFILE,'--freq',FREQ,'-r',REGION_LOC, ## '--covar',COVAR, '-o', yank_out]) annot_dict_loc = fix_it.locate_annot_dict(build) annot_dict = fix_it.build_annot_dict('LOG',annot_dict_loc) snp_list = get_snp_list(meta_yank_loc) fixed_table_loc = fam_loc #fixed_table_loc = fix_it.locate_fixed_table(fam_loc, build) retrieve_fam_data(snp_list, fixed_table_loc, out_loc, annot_dict) ## meta_yank.main(['--family',fam_loc, '--build',build, '--bfile',BFILE,'--freq',FREQ,'-r',REGION_LOC, '--covar',COVAR, '-o', yank_out])
def create_annot_dict(build, purpose): annot_dict_loc = fix_it.locate_annot_dict(build) annot_dict = fix_it.build_annot_dict(purpose,annot_dict_loc) return annot_dict
def main(argv): global outfolder, assoc, chromosome,snpstar global out_flag, user_script_loc global range_start_bp, range_end_bp, hit_index, hitstring global region_id, build, single, ldfolder, freq_loc, multi, hit1 cl_arguments(argv) placeholder = '' annot_dict_loc = fix_it.locate_annot_dict(build) annot_dict = fix_it.build_annot_dict('LOG',annot_dict_loc) snpstar_im = annot_dict[snpstar].name table_folder = os.path.join(outfolder, 'ResultTables') summary_folder = os.path.join(outfolder, 'SummaryTables') str_hit_index = str(hit_index) if hit_index < 10: str_hit_index = '0'+str_hit_index if not os.path.exists(table_folder): os.makedirs(table_folder) if not os.path.exists(summary_folder): os.makedirs(summary_folder) chr_folder = os.path.join(outfolder, 'chr{0}'.format(chromosome)) if not os.path.exists(chr_folder): os.makedirs(chr_folder) reg_folder = os.path.join(chr_folder, region_id) if multi: placeholder = '_'+snpstar+'_'+str_hit_index if not os.path.exists(reg_folder): os.makedirs(reg_folder) super_outbase = os.path.join(reg_folder, region_id) list_loc = super_outbase + '_snps.list' table_loc = os.path.join(chr_folder, region_id+placeholder+'.tbl') new_table_loc = os.path.join(table_folder, region_id+placeholder+'.tbl') summary_table_loc = os.path.join(summary_folder, region_id+placeholder+'.tbl') print(table_loc) snplist = make_snp_pos_list(assoc,list_loc) ld_loc = os.path.join(ldfolder,'chr{0}'.format(chromosome), '{0}_r2_0.ld'.format(region_id)) table = open(table_loc, mode="w") table.write('\t'.join(['SNP*','SNP*_pos','SNP*_im','conditional_snp', 'csnp_im','csnp_pos','SNP*_pvalue','OR','ci_lo','ci_hi', 'a1',"r2","csnp_freq","csnp_freq_a1"])+'\n') table.close() index = 1 for snp_tuple in snplist: snp = snp_tuple[0] snp_pos = snp_tuple[1] snp_im = annot_dict[snp].name corrected_snp = snp.replace(':','_') outbase = super_outbase+'_'+corrected_snp script_loc = outbase + '.script' assoc_out = outbase +'.assoc.logistic' if hit1: write_script(outbase,script_loc, user_script_loc, snp, single, hitstring) plink(script_loc) print(''' ********************************************************************** ************************************************************ The data will be conditioned on the following SNP: %%% {0} This is snp #{1} in a list of {2}. ****************************************************** ********************************************************************** '''.format(snp,index,len(snplist))) ## if snp == snpstar: ## write_script(outbase,script_loc, user_script_loc, snp, single, hitstring) ## plink(script_loc) elif not multi: write_script(outbase,script_loc, user_script_loc, snp, single, snpstar) plink(script_loc) print(''' ********************************************************************** ************************************************************ The data will be conditioned on the following SNP: %%% {0} This is snp #{1} in a list of {2}. ****************************************************** ********************************************************************** '''.format(snp,index,len(snplist))) info = filter_result(assoc_out,snpstar) snp_freq = pc_toolbox.retrieve_freq(freq_loc, snp) r2 = pc_toolbox.retrieve_r2(snpstar,snp,ld_loc) if r2 is None: r2 = "???" index = index + 1 with open(table_loc, mode='a') as table: table.write('\t'.join([snpstar,info.pos, snpstar_im, snp,snp_im,snp_pos,str(info.p), info.OR,info.lo,info.hi, info.a1,r2,snp_freq[0],snp_freq[1]])+'\n') shutil.copy(table_loc, new_table_loc) summarize_table(new_table_loc, summary_table_loc, snpstar)