def plot_GC(chr,tbx_gc,cp_vect,starts,ends):
    F=open("GC.txt",'w')
    cp_vect = cp_vect.astype(np.float64) 
    var = get_windowed_variance(cp_vect,50)
    F.write("var\tgc\n") 
    for i in xrange(50,starts.shape[0],101):
        s = starts[i-50]
        e = ends[i+50]
        gc = np.mean(np.array([float(l[3]) for l in tbx_gc.fetch(chr,s,e,parser=pysam.asTuple())]))
        if var[i] != 0:
            print >>F,"%f\t%f"%( var[i],gc )
        



    exit(1)
Example #2
0
    def init_all_dists(self):
        
        self.get_indiv_window_dist()
        self.csum_all = np.cumsum(self.all_cps)
        self.mu_probDensByWidth = {}
        self.ll_probDensByWidth = {}

        self.half_width = 250
        self.variance_vect = get_windowed_variance(self.all_cps, self.half_width)
        self.var_left=np.roll(self.variance_vect,self.half_width+1)
        self.var_right=np.roll(self.variance_vect,-self.half_width-1)
        """
        var_left and var_right represent at position k, 
        the variance of the 2*half_width+1 windows to the right
        and the left 
        """
        self.get_variance_dist()
        self.null_var = np.var(self.all_cps)
        self.null_mu = np.mean(self.all_cps)
        self.initialized = True
    def init_all_dists(self):
        
        self.get_indiv_window_dist()
        self.csum_all = np.cumsum(self.all_cps)
        self.mu_probDensByWidth = {}
        self.ll_probDensByWidth = {}

        self.half_width = 250
        self.variance_vect = get_windowed_variance(self.all_cps, self.half_width)
        self.var_left=np.roll(self.variance_vect,self.half_width+1)
        self.var_right=np.roll(self.variance_vect,-self.half_width-1)
        """
        var_left and var_right represent at position k, 
        the variance of the 2*half_width+1 windows to the right
        and the left 
        """
        self.get_variance_dist()
        self.null_var = np.var(self.all_cps)
        self.null_mu = np.mean(self.all_cps)
        self.initialized = True
Example #4
0
    def __init__(self,chr,cp_data, starts, ends, cutoff_scale, **kwargs):
        
        max_merge=kwargs.get("max_merge",0.5)
        use_means=kwargs.get("use_means",False)
        n_scales=kwargs.get("n_scales",51)
        #n_scales=kwargs.get("n_scales",30)
        scale_width=kwargs.get('scale_width',1)
        n_bin_smoothings=kwargs.get('-n_bin_smoothings',0)
        smoothing_kernel=kwargs.get('smoothing_kernel',np.array([1,2,1]))
        self.chr = chr
        self.cutoff_scale =  cutoff_scale
        self.scales = list(np.arange(1,n_scales,scale_width))
        self.starts = starts
        self.ends = ends
        self.n_wnds = self.starts.shape[0]
        self.cp_data = cp_data
        
        self.der1=np.zeros((len(self.scales),self.n_wnds),dtype=np.float32)
        self.der2=np.zeros((len(self.scales),self.n_wnds),dtype=np.int8)
        
        self.vars = get_windowed_variance(cp_data.astype(np.float64),500) 
        self.l_vars = np.roll(self.vars,501) 
        self.r_vars = np.roll(self.vars,-501) 

        print >>stderr, "scales range from %f-%f"%(self.scales[0],self.scales[-1])
        for i in xrange(n_bin_smoothings):
            print >>stderr,"doing binomial smooth #%d"%i
            cp_data=ndi.convolve1d(cp_data,smoothing_kernel)/np.sum(smoothing_kernel)
        
        transitions_by_scale = {}
        print >>stderr, "finding contours..."
        for i_scale,scale in enumerate(self.scales):
            stderr.write("%.2f "%(scale))
            stderr.flush()
            g1=ndi.gaussian_filter1d(cp_data,scale,order=1)                            
            g2=ndi.gaussian_filter1d(cp_data,scale,order=2)
            edges,pos_edges,neg_edges = self.get_n_edges(g1,g2)
            self.der1[i_scale,:]=g1
            self.der2[i_scale,:]=pos_edges-neg_edges
            transitions_by_scale[scale]=(edges,pos_edges,neg_edges)
        stderr.write("done\n")
        
        self.contour_intersects,x_intercept_to_scale=get_contours(self.der2)    
        
        ######NOW we have all the per-scale contours
        #print contour_intersects
        edges_passing_cutoff =[]
        curr_all_edges=[]
        curr_all_edges_scales=[]
       
        #take all the edges discovered at some scale
        for scale,edges in self.contour_intersects.iteritems():
            curr_all_edges.extend(edges)
            curr_all_edges_scales.extend([scale for i in xrange(len(edges))])
            if scale >=cutoff_scale:
                edges_passing_cutoff.extend(edges)
        edges_passing_cutoff=sorted(set(edges_passing_cutoff))
            
        all_edges_scales=sorted(zip(curr_all_edges,curr_all_edges_scales))
        stderr.write("hierarchically merging segments\n")
        
        t = time.time()
        segments_s, segments_e, cps = c_hierarch_merge_edges(cp_data, 
                                                        edges_passing_cutoff,
                                                        max_merge,
                                                        use_means,
                                                        self.n_wnds,
                                                        self.starts,
                                                        self.ends)
        #segments_s, segments_e, cps = hierarch_merge_edges(cp_data, 
        #                                                edges_passing_cutoff, 
        #                                                max_merge,use_means)
        self.segment_edges=(segments_s,segments_e,cps)
        print >>stderr, "hierarchical clustering completed in %fs"%(time.time()-t)  
Example #5
0
    def __init__(self, chr, cp_data, starts, ends, cutoff_scale, **kwargs):

        max_merge = kwargs.get("max_merge", 0.5)
        use_means = kwargs.get("use_means", False)
        n_scales = kwargs.get("n_scales", 51)
        #n_scales=kwargs.get("n_scales",30)
        scale_width = kwargs.get('scale_width', 1)
        n_bin_smoothings = kwargs.get('-n_bin_smoothings', 0)
        smoothing_kernel = kwargs.get('smoothing_kernel', np.array([1, 2, 1]))
        self.chr = chr
        self.cutoff_scale = cutoff_scale
        self.scales = list(np.arange(1, n_scales, scale_width))
        self.starts = starts
        self.ends = ends
        self.n_wnds = self.starts.shape[0]
        self.cp_data = cp_data

        self.der1 = np.zeros((len(self.scales), self.n_wnds), dtype=np.float32)
        self.der2 = np.zeros((len(self.scales), self.n_wnds), dtype=np.int8)

        self.vars = get_windowed_variance(cp_data.astype(np.float64), 500)
        self.l_vars = np.roll(self.vars, 501)
        self.r_vars = np.roll(self.vars, -501)

        print("scales range from %f-%f" % (self.scales[0], self.scales[-1]),
              file=stderr)
        for i in range(n_bin_smoothings):
            print("doing binomial smooth #%d" % i, file=stderr)
            cp_data = ndi.convolve1d(
                cp_data, smoothing_kernel) / np.sum(smoothing_kernel)

        transitions_by_scale = {}
        print("finding contours...", file=stderr)
        for i_scale, scale in enumerate(self.scales):
            stderr.write("%.2f " % (scale))
            stderr.flush()
            g1 = ndi.gaussian_filter1d(cp_data, scale, order=1)
            g2 = ndi.gaussian_filter1d(cp_data, scale, order=2)
            edges, pos_edges, neg_edges = self.get_n_edges(g1, g2)
            self.der1[i_scale, :] = g1
            self.der2[i_scale, :] = pos_edges - neg_edges
            transitions_by_scale[scale] = (edges, pos_edges, neg_edges)
        stderr.write("done\n")

        self.contour_intersects, x_intercept_to_scale = get_contours(self.der2)

        ######NOW we have all the per-scale contours
        #print contour_intersects
        edges_passing_cutoff = []
        curr_all_edges = []
        curr_all_edges_scales = []

        #take all the edges discovered at some scale
        for scale, edges in self.contour_intersects.items():
            curr_all_edges.extend(edges)
            curr_all_edges_scales.extend([scale for i in range(len(edges))])
            if scale >= cutoff_scale:
                edges_passing_cutoff.extend(edges)
        edges_passing_cutoff = sorted(set(edges_passing_cutoff))

        all_edges_scales = sorted(zip(curr_all_edges, curr_all_edges_scales))
        stderr.write("hierarchically merging segments\n")

        t = time.time()
        segments_s, segments_e, cps = c_hierarch_merge_edges(
            cp_data, edges_passing_cutoff, max_merge, use_means, self.n_wnds,
            self.starts, self.ends)
        #segments_s, segments_e, cps = hierarch_merge_edges(cp_data,
        #                                                edges_passing_cutoff,
        #                                                max_merge,use_means)
        self.segment_edges = (segments_s, segments_e, cps)
        print("hierarchical clustering completed in %fs" % (time.time() - t),
              file=stderr)