Esempio n. 1
0
def get_region_from_indiv(indiv,wssd_lib_dir,bac_analysis_lib_dir,chr,start,end,fn_gc_table,fn_contig):

    #mask_table = DenseTrackSet(fn_contig,
#                                                        fn_mask_table,
#                                                        overwrite=False,
#                                                        openMode='r') 
    gc_content_table =   DenseTrackSet(fn_contig,
                                                        fn_gc_table,
                                                        overwrite=False,
                                                        openMode='r') 

    read_depth = np.zeros(end-start)
    print(read_depth)
    print(end-start)

    libls = os.listdir(bac_analysis_lib_dir)
    for lib in libls:
        if(lib[0:1]=="_"):
            continue
        lib_qc_dir = "%s/%s"%(bac_analysis_lib_dir,lib)
        if(kgf.lib_pass_qc(lib_qc_dir)):
            print("library %s passed qc: loading..."%(lib))
            fn_wssd = "%s/%s/hg18rmsk.wssd"%(wssd_lib_dir,lib)
            gc_correction_vect = load_gc_correction_vect(lib_qc_dir)

            print("opening wssd %s"%(fn_wssd))

            wssd = WssdFile(fn_contig,
                                            fn_wssd,
                                            overwrite=False,
                                            openMode='r') 

            print("wssd opened..")
            grpWssd = "wssd"
            grpGC = "GC_content"
            read_depth+=get_adjusted_depth(wssd,grpWssd,gc_content_table,grpGC,chr,start,end,gc_correction_vect,0)
        
            wssd.tbl.close()
            del wssd

    gc_content_table.tbl.close()
    del gc_content_table

    return read_depth
Esempio n. 2
0
def get_region_from_indiv(indiv,wssd_lib_dir,bac_analysis_lib_dir,chr,start,end,fn_gc_table,fn_contig):

    #mask_table = DenseTrackSet(fn_contig,
#                                                        fn_mask_table,
#                                                        overwrite=False,
#                                                        openMode='r') 
    gc_content_table =   DenseTrackSet(fn_contig,
                                                        fn_gc_table,
                                                        overwrite=False,
                                                        openMode='r') 

    read_depth = np.zeros(end-start)
    print read_depth
    print end-start

    libls = os.listdir(bac_analysis_lib_dir)
    for lib in libls:
        if(lib[0:1]=="_"):
            continue
        lib_qc_dir = "%s/%s"%(bac_analysis_lib_dir,lib)
        if(kgf.lib_pass_qc(lib_qc_dir)):
            print "library %s passed qc: loading..."%(lib)
            fn_wssd = "%s/%s/hg18rmsk.wssd"%(wssd_lib_dir,lib)
            gc_correction_vect = load_gc_correction_vect(lib_qc_dir)

            print "opening wssd %s"%(fn_wssd)

            wssd = WssdFile(fn_contig,
                                            fn_wssd,
                                            overwrite=False,
                                            openMode='r') 

            print "wssd opened.."
            grpWssd = "wssd"
            grpGC = "GC_content"
            read_depth+=get_adjusted_depth(wssd,grpWssd,gc_content_table,grpGC,chr,start,end,gc_correction_vect,0)
        
            wssd.tbl.close()
            del wssd

    gc_content_table.tbl.close()
    del gc_content_table

    return read_depth
def get_passing_libs(bac_analysis_lib_dir,max_correction,overide_thresh):
	
	passing_libs = {}

	libls = os.listdir(bac_analysis_lib_dir)
	for lib in libls:
		if(lib[0:1]=="_" or lib[0] == "."):
			continue
		lib_qc_dir = "%s/%s"%(bac_analysis_lib_dir,lib)
		if(kgf.lib_pass_qc(lib_qc_dir,overide_thresh)):
			print "lib passed: %s"%(lib)
			gc_correction_vects = []	
			#gc_correction_vects.append(load_gc_correction_vect(lib_qc_dir,0))
			#gc_correction_vects.append(load_gc_correction_vect(lib_qc_dir,1))
			gc_correction_vects.append(kgf.get_GC_depth_correction_from_dir(lib_qc_dir,max_correction))
			#print gc_correction_vects[0]

			passing_libs[lib] = gc_correction_vects

	return passing_libs
Esempio n. 4
0
def get_passing_libs(bac_analysis_lib_dir, max_correction, overide_thresh):

    passing_libs = {}

    libls = os.listdir(bac_analysis_lib_dir)
    for lib in libls:
        if (lib[0:1] == "_" or lib[0] == "."):
            continue
        lib_qc_dir = "%s/%s" % (bac_analysis_lib_dir, lib)
        if (kgf.lib_pass_qc(lib_qc_dir, overide_thresh)):
            print "lib passed: %s" % (lib)
            gc_correction_vects = []
            #gc_correction_vects.append(load_gc_correction_vect(lib_qc_dir,0))
            #gc_correction_vects.append(load_gc_correction_vect(lib_qc_dir,1))
            gc_correction_vects.append(
                kgf.get_GC_depth_correction_from_dir(lib_qc_dir,
                                                     max_correction))
            #print gc_correction_vects[0]

            passing_libs[lib] = gc_correction_vects

    return passing_libs
Esempio n. 5
0
def analyze_coverage_to_correlation(analysis_dir):

    #anal_by_alg    = {}
    print "************************"
    print "analyze_coverage_to_correlation"
    print "************************"

    s_analysis_dir = analysis_dir.split("/")
    pilot = s_analysis_dir[len(s_analysis_dir) - 2]

    print s_analysis_dir
    pass_thresh = 0
    total = 0
    ls_algs = os.listdir(analysis_dir)
    for alg in ls_algs:
        if (alg != "mrsfast"):
            continue
        print analysis_dir
        alg_dir = "%s/%s" % (analysis_dir, alg)
        print alg_dir
        ls_indivs = os.listdir(alg_dir)

        anal_by_indivs = {}

        for indiv in ls_indivs:
            ecs = []
            corrs = []
            ecs_corrs = {}
            ecs_corrs["ecs"] = ecs
            ecs_corrs["corrs"] = corrs

            anal_by_indivs[indiv] = ecs_corrs

            indiv_dir = "%s/%s" % (alg_dir, indiv)
            ls_libs = os.listdir(indiv_dir)
            for lib in ls_libs:
                #if(lib[0:1] == "_"): continue
                if (lib[0:1] == "_" or lib[0:3] == "do_" or lib[0] == "."):
                    continue
                total += 1
                lib_dir = "%s/%s" % (indiv_dir, lib)
                if (get_lib_cov_corr(lib_dir) != None):
                    if (kgf.lib_pass_qc(lib_dir)):
                        pass_thresh += 1
                    (ec, corr) = get_lib_cov_corr(lib_dir)
                    ecs.append(ec)
                    corrs.append(corr)

    do_plot = False

    if do_plot:
        figure(1)
    indivs = []
    for indiv in anal_by_indivs:
        indivs.append(indiv)
        ecs_corrs = anal_by_indivs[indiv]
        ecs = np.array(ecs_corrs["ecs"])
        corrs = np.array(ecs_corrs["corrs"])
        marker = '.'
        if (indiv[2:4] == "19" and pilot == "Pilot2"):
            marker = '^'
        if do_plot:
            plot(ecs, corrs, marker)

    thresh = kgf.get_lib_pass_thresh()
    plot(np.array((0, 12)), np.array((thresh, thresh)), "r--")
    indivs.append("threshold")
    if (pilot == "Pilot2"):
        legend(indivs, loc=4)
    xlabel("effective coverage bp")
    ylabel("correlation with bacs (X inc)")
    title("%s - correlation vs ec - %d/%d meet threshold" %
          (pilot, pass_thresh, total))
    figurename = "%s/%s" % (analysis_dir,
                            "/_all_alg_analysis/ec_vs_corr_dim0.png")

    if do_plot:
        savefig(figurename, format='png')
        print figurename
        close(1)
def analyze_coverage_to_correlation(analysis_dir):

    #anal_by_alg    = {}
    print "************************"
    print "analyze_coverage_to_correlation"
    print "************************"

    s_analysis_dir =  analysis_dir.split("/")
    pilot = s_analysis_dir[len(s_analysis_dir)-2]

    print s_analysis_dir
    pass_thresh = 0;
    total = 0
    ls_algs = os.listdir(analysis_dir)
    for alg in ls_algs:
        if(alg != "mrsfast"):
            continue
        print analysis_dir
        alg_dir = "%s/%s"%(analysis_dir,alg)
        print alg_dir
        ls_indivs = os.listdir(alg_dir)

        anal_by_indivs = {}

        for indiv in ls_indivs:
            ecs = []
            corrs = []
            ecs_corrs = {}
            ecs_corrs["ecs"] = ecs
            ecs_corrs["corrs"] = corrs

            anal_by_indivs[indiv] = ecs_corrs

            indiv_dir = "%s/%s"%(alg_dir,indiv)
            ls_libs = os.listdir(indiv_dir)
            for lib in ls_libs:
                #if(lib[0:1] == "_"): continue
                if(lib[0:1] == "_" or lib[0:3]=="do_" or lib[0]=="."):continue
                total+=1
                lib_dir = "%s/%s"%(indiv_dir,lib)
                if(get_lib_cov_corr(lib_dir) != None):
                    if(kgf.lib_pass_qc(lib_dir)):
                        pass_thresh+=1
                    (ec,corr) = get_lib_cov_corr(lib_dir)
                    ecs.append(ec)
                    corrs.append(corr)

    do_plot  = False

    if do_plot:
        figure(1)
    indivs = []
    for indiv in anal_by_indivs:
        indivs.append(indiv)
        ecs_corrs = anal_by_indivs[indiv]
        ecs = np.array(ecs_corrs["ecs"])
        corrs = np.array(ecs_corrs["corrs"])
        marker = '.'
        if(indiv[2:4] == "19" and pilot=="Pilot2"):
            marker = '^'
        if do_plot:
            plot(ecs,corrs,marker)


    thresh = kgf.get_lib_pass_thresh()
    plot(np.array((0,12)),np.array((thresh,thresh)),"r--")
    indivs.append("threshold")
    if(pilot=="Pilot2"):
        legend(indivs,loc=4)
    xlabel("effective coverage bp")
    ylabel("correlation with bacs (X inc)")
    title("%s - correlation vs ec - %d/%d meet threshold"%(pilot,pass_thresh,total))
    figurename = "%s/%s"%(analysis_dir,"/_all_alg_analysis/ec_vs_corr_dim0.png")


    if do_plot:
        savefig(figurename,format='png')
        print figurename
        close(1)