def get_region_from_indiv(indiv,wssd_lib_dir,bac_analysis_lib_dir,chr,start,end,fn_gc_table,fn_contig): #mask_table = DenseTrackSet(fn_contig, # fn_mask_table, # overwrite=False, # openMode='r') gc_content_table = DenseTrackSet(fn_contig, fn_gc_table, overwrite=False, openMode='r') read_depth = np.zeros(end-start) print(read_depth) print(end-start) libls = os.listdir(bac_analysis_lib_dir) for lib in libls: if(lib[0:1]=="_"): continue lib_qc_dir = "%s/%s"%(bac_analysis_lib_dir,lib) if(kgf.lib_pass_qc(lib_qc_dir)): print("library %s passed qc: loading..."%(lib)) fn_wssd = "%s/%s/hg18rmsk.wssd"%(wssd_lib_dir,lib) gc_correction_vect = load_gc_correction_vect(lib_qc_dir) print("opening wssd %s"%(fn_wssd)) wssd = WssdFile(fn_contig, fn_wssd, overwrite=False, openMode='r') print("wssd opened..") grpWssd = "wssd" grpGC = "GC_content" read_depth+=get_adjusted_depth(wssd,grpWssd,gc_content_table,grpGC,chr,start,end,gc_correction_vect,0) wssd.tbl.close() del wssd gc_content_table.tbl.close() del gc_content_table return read_depth
def get_region_from_indiv(indiv,wssd_lib_dir,bac_analysis_lib_dir,chr,start,end,fn_gc_table,fn_contig): #mask_table = DenseTrackSet(fn_contig, # fn_mask_table, # overwrite=False, # openMode='r') gc_content_table = DenseTrackSet(fn_contig, fn_gc_table, overwrite=False, openMode='r') read_depth = np.zeros(end-start) print read_depth print end-start libls = os.listdir(bac_analysis_lib_dir) for lib in libls: if(lib[0:1]=="_"): continue lib_qc_dir = "%s/%s"%(bac_analysis_lib_dir,lib) if(kgf.lib_pass_qc(lib_qc_dir)): print "library %s passed qc: loading..."%(lib) fn_wssd = "%s/%s/hg18rmsk.wssd"%(wssd_lib_dir,lib) gc_correction_vect = load_gc_correction_vect(lib_qc_dir) print "opening wssd %s"%(fn_wssd) wssd = WssdFile(fn_contig, fn_wssd, overwrite=False, openMode='r') print "wssd opened.." grpWssd = "wssd" grpGC = "GC_content" read_depth+=get_adjusted_depth(wssd,grpWssd,gc_content_table,grpGC,chr,start,end,gc_correction_vect,0) wssd.tbl.close() del wssd gc_content_table.tbl.close() del gc_content_table return read_depth
def get_passing_libs(bac_analysis_lib_dir,max_correction,overide_thresh): passing_libs = {} libls = os.listdir(bac_analysis_lib_dir) for lib in libls: if(lib[0:1]=="_" or lib[0] == "."): continue lib_qc_dir = "%s/%s"%(bac_analysis_lib_dir,lib) if(kgf.lib_pass_qc(lib_qc_dir,overide_thresh)): print "lib passed: %s"%(lib) gc_correction_vects = [] #gc_correction_vects.append(load_gc_correction_vect(lib_qc_dir,0)) #gc_correction_vects.append(load_gc_correction_vect(lib_qc_dir,1)) gc_correction_vects.append(kgf.get_GC_depth_correction_from_dir(lib_qc_dir,max_correction)) #print gc_correction_vects[0] passing_libs[lib] = gc_correction_vects return passing_libs
def get_passing_libs(bac_analysis_lib_dir, max_correction, overide_thresh): passing_libs = {} libls = os.listdir(bac_analysis_lib_dir) for lib in libls: if (lib[0:1] == "_" or lib[0] == "."): continue lib_qc_dir = "%s/%s" % (bac_analysis_lib_dir, lib) if (kgf.lib_pass_qc(lib_qc_dir, overide_thresh)): print "lib passed: %s" % (lib) gc_correction_vects = [] #gc_correction_vects.append(load_gc_correction_vect(lib_qc_dir,0)) #gc_correction_vects.append(load_gc_correction_vect(lib_qc_dir,1)) gc_correction_vects.append( kgf.get_GC_depth_correction_from_dir(lib_qc_dir, max_correction)) #print gc_correction_vects[0] passing_libs[lib] = gc_correction_vects return passing_libs
def analyze_coverage_to_correlation(analysis_dir): #anal_by_alg = {} print "************************" print "analyze_coverage_to_correlation" print "************************" s_analysis_dir = analysis_dir.split("/") pilot = s_analysis_dir[len(s_analysis_dir) - 2] print s_analysis_dir pass_thresh = 0 total = 0 ls_algs = os.listdir(analysis_dir) for alg in ls_algs: if (alg != "mrsfast"): continue print analysis_dir alg_dir = "%s/%s" % (analysis_dir, alg) print alg_dir ls_indivs = os.listdir(alg_dir) anal_by_indivs = {} for indiv in ls_indivs: ecs = [] corrs = [] ecs_corrs = {} ecs_corrs["ecs"] = ecs ecs_corrs["corrs"] = corrs anal_by_indivs[indiv] = ecs_corrs indiv_dir = "%s/%s" % (alg_dir, indiv) ls_libs = os.listdir(indiv_dir) for lib in ls_libs: #if(lib[0:1] == "_"): continue if (lib[0:1] == "_" or lib[0:3] == "do_" or lib[0] == "."): continue total += 1 lib_dir = "%s/%s" % (indiv_dir, lib) if (get_lib_cov_corr(lib_dir) != None): if (kgf.lib_pass_qc(lib_dir)): pass_thresh += 1 (ec, corr) = get_lib_cov_corr(lib_dir) ecs.append(ec) corrs.append(corr) do_plot = False if do_plot: figure(1) indivs = [] for indiv in anal_by_indivs: indivs.append(indiv) ecs_corrs = anal_by_indivs[indiv] ecs = np.array(ecs_corrs["ecs"]) corrs = np.array(ecs_corrs["corrs"]) marker = '.' if (indiv[2:4] == "19" and pilot == "Pilot2"): marker = '^' if do_plot: plot(ecs, corrs, marker) thresh = kgf.get_lib_pass_thresh() plot(np.array((0, 12)), np.array((thresh, thresh)), "r--") indivs.append("threshold") if (pilot == "Pilot2"): legend(indivs, loc=4) xlabel("effective coverage bp") ylabel("correlation with bacs (X inc)") title("%s - correlation vs ec - %d/%d meet threshold" % (pilot, pass_thresh, total)) figurename = "%s/%s" % (analysis_dir, "/_all_alg_analysis/ec_vs_corr_dim0.png") if do_plot: savefig(figurename, format='png') print figurename close(1)
def analyze_coverage_to_correlation(analysis_dir): #anal_by_alg = {} print "************************" print "analyze_coverage_to_correlation" print "************************" s_analysis_dir = analysis_dir.split("/") pilot = s_analysis_dir[len(s_analysis_dir)-2] print s_analysis_dir pass_thresh = 0; total = 0 ls_algs = os.listdir(analysis_dir) for alg in ls_algs: if(alg != "mrsfast"): continue print analysis_dir alg_dir = "%s/%s"%(analysis_dir,alg) print alg_dir ls_indivs = os.listdir(alg_dir) anal_by_indivs = {} for indiv in ls_indivs: ecs = [] corrs = [] ecs_corrs = {} ecs_corrs["ecs"] = ecs ecs_corrs["corrs"] = corrs anal_by_indivs[indiv] = ecs_corrs indiv_dir = "%s/%s"%(alg_dir,indiv) ls_libs = os.listdir(indiv_dir) for lib in ls_libs: #if(lib[0:1] == "_"): continue if(lib[0:1] == "_" or lib[0:3]=="do_" or lib[0]=="."):continue total+=1 lib_dir = "%s/%s"%(indiv_dir,lib) if(get_lib_cov_corr(lib_dir) != None): if(kgf.lib_pass_qc(lib_dir)): pass_thresh+=1 (ec,corr) = get_lib_cov_corr(lib_dir) ecs.append(ec) corrs.append(corr) do_plot = False if do_plot: figure(1) indivs = [] for indiv in anal_by_indivs: indivs.append(indiv) ecs_corrs = anal_by_indivs[indiv] ecs = np.array(ecs_corrs["ecs"]) corrs = np.array(ecs_corrs["corrs"]) marker = '.' if(indiv[2:4] == "19" and pilot=="Pilot2"): marker = '^' if do_plot: plot(ecs,corrs,marker) thresh = kgf.get_lib_pass_thresh() plot(np.array((0,12)),np.array((thresh,thresh)),"r--") indivs.append("threshold") if(pilot=="Pilot2"): legend(indivs,loc=4) xlabel("effective coverage bp") ylabel("correlation with bacs (X inc)") title("%s - correlation vs ec - %d/%d meet threshold"%(pilot,pass_thresh,total)) figurename = "%s/%s"%(analysis_dir,"/_all_alg_analysis/ec_vs_corr_dim0.png") if do_plot: savefig(figurename,format='png') print figurename close(1)