Esempio n. 1
0
    def _initialize(self, data1, data2):
        try:
            import statsmodels.api as sm
            lowess = sm.nonparametric.lowess
        except ImportError:
            print("===================================")
            print("Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'")
            print("===================================")

        # NOTE: delta parameter is only available from statsmodels > 0.5.0
        delta = (max(data1) - min(data1)) * 0.01
        frac = 0.1
        
        if len(data1) < 100:
            frac = 1.0

        k = 0
        while k <= 10:
            k += 1
            # Input data is y/x -> needs switch
            result = lowess(numpy.array(data2), numpy.array(data1), delta=delta, frac=frac, it=10)

            if any( [math.isnan(r[1]) for r in result] ):
                print ("WARNING: lowess returned NA data points! We are trying to fix it")
                delta = delta * k
                result = lowess(numpy.array(data2), numpy.array(data1), delta=delta, frac=frac, it=10)
                frac = 1.0
            else:
                break

        return [ r[0] for r in result], [r[1] for r in result]
Esempio n. 2
0
def correct(sample,gcCount,binSize,maxN=0.1,minRD=0.0001,fVal=0.1,iVal=3):
	allX = []
	allY = []

	chroms = sample.keys()

	for chrom in chroms:
		for bin in range(min(len(gcCount[chrom]),len(sample[chrom]))):
			if gcCount['N'+chrom][bin] < binSize * maxN and sample[chrom][bin] > binSize * minRD:
				allX.append(gcCount[chrom][bin])
				allY.append(sample[chrom][bin])

	allX = np.array(allX,np.float)
	allY = np.array(allY,np.float)
	lowessCurve = biostat.lowess(allX,allY,f=fVal, iter=iVal).tolist()
	
	correctedSample = dict()
	for chrom in chroms:
		correctedSample[chrom] = []
		for bin in range(min(len(gcCount[chrom]),len(sample[chrom]))):
			if gcCount['N'+chrom][bin] < binSize * maxN and sample[chrom][bin] > binSize * minRD:
				correctedValue = sample[chrom][bin]/lowessCurve.pop(0)
				correctedSample[chrom].append(correctedValue)
			else:
				correctedSample[chrom].append(0)

	return correctedSample
Esempio n. 3
0
def correct(sample,
            gcCount,
            binSize,
            maxN=0.1,
            minRD=0.0001,
            fVal=0.1,
            iVal=3):
    allX = []
    allY = []

    chroms = sample.keys()

    for chrom in chroms:
        for bin in range(min(len(gcCount[chrom]), len(sample[chrom]))):
            if gcCount['N' + chrom][bin] < binSize * maxN and sample[chrom][
                    bin] > binSize * minRD:
                allX.append(gcCount[chrom][bin])
                allY.append(sample[chrom][bin])

    allX = np.array(allX, np.float)
    allY = np.array(allY, np.float)
    lowessCurve = biostat.lowess(allX, allY, f=fVal, iter=iVal).tolist()

    correctedSample = dict()
    for chrom in chroms:
        correctedSample[chrom] = []
        for bin in range(min(len(gcCount[chrom]), len(sample[chrom]))):
            if gcCount['N' + chrom][bin] < binSize * maxN and sample[chrom][
                    bin] > binSize * minRD:
                correctedValue = sample[chrom][bin] / lowessCurve.pop(0)
                correctedSample[chrom].append(correctedValue)
            else:
                correctedSample[chrom].append(0)

    return correctedSample
Esempio n. 4
0
    def _initialize(self, data1, data2):
        try:
            from Bio.Statistics.lowess import lowess
        except ImportError:
            print "==================================="
            print "Cannot import the module lowess from Biopython, \nplease install 'biopython' from https://pypi.python.org/pypi/biopython"
            print "==================================="

        old_settings = numpy.seterr(all='ignore')

        result = lowess(numpy.array(data1), numpy.array(data2), f=0.1, iter=3)
        if all([math.isnan(it) for it in result]):
            # Try standard paramters
            result = lowess(numpy.array(data1), numpy.array(data2))

        numpy.seterr(**old_settings)
        return data1, result
Esempio n. 5
0
    def _initialize(self, data1, data2):
        try:
            from Bio.Statistics.lowess import lowess
        except ImportError:
            print "==================================="
            print "Cannot import the module lowess from Biopython, \nplease install 'biopython' from https://pypi.python.org/pypi/biopython"
            print "==================================="

        old_settings = numpy.seterr(all='ignore')

        result = lowess(numpy.array(data1), numpy.array(data2), f=0.1, iter=3)
        if all([math.isnan(it) for it in result]):
            # Try standard paramters
            result = lowess(numpy.array(data1), numpy.array(data2))

        numpy.seterr(**old_settings)
        return data1, result
Esempio n. 6
0
 def test_Precomputed(self):
     x = array([0.0, 1.0, 2.0, 3.0, 5.0, 9.0, 11.0])
     y = x**2
     # Precalculated smooth output
     ys = array([-2.96219015, 1.72680044, 6.58686813,
                 11.62986671, 28.18598762, 86.85271581, 116.83893423 ])
     # Smooth output calculated by the lowess function
     output = lowess(x, y, f=2./3., iter = 3)
     for precomputed, calculated in zip(ys, output):
         self.assertAlmostEqual(precomputed, calculated, 4)
Esempio n. 7
0
    def _initialize(self, data1, data2):
        try:
            import statsmodels.api as sm
            lowess = sm.nonparametric.lowess
        except ImportError:
            print("===================================")
            print(
                "Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'"
            )
            print("===================================")

        # NOTE: delta parameter is only available from statsmodels > 0.5.0
        delta = (max(data1) - min(data1)) * 0.01
        frac = 0.1

        if len(data1) < 100:
            frac = 1.0

        k = 0
        while k <= 10:
            k += 1
            # Input data is y/x -> needs switch
            result = lowess(numpy.array(data2),
                            numpy.array(data1),
                            delta=delta,
                            frac=frac,
                            it=10)

            if any([math.isnan(r[1]) for r in result]):
                print(
                    "WARNING: lowess returned NA data points! We are trying to fix it"
                )
                delta = delta * k
                result = lowess(numpy.array(data2),
                                numpy.array(data1),
                                delta=delta,
                                frac=frac,
                                it=10)
                frac = 1.0
            else:
                break

        return [r[0] for r in result], [r[1] for r in result]
Esempio n. 8
0
 def test_Precomputed(self):
     x = array([0.0, 1.0, 2.0, 3.0, 5.0, 9.0, 11.0])
     y = x**2
     # Precalculated smooth output
     ys = array([-2.96219015, 1.72680044, 6.58686813,
                 11.62986671, 28.18598762, 86.85271581, 116.83893423 ])
     # Smooth output calculated by the lowess function
     output = lowess(x, y, f=2./3., iter = 3)
     for precomputed, calculated in zip(ys, output):
         self.assertAlmostEqual(precomputed, calculated, places=4)
Esempio n. 9
0
 def scatterLinePlot(self,title_I,xlabel_I,ylabel_I,x_data_I,y_data_I,text_labels_I=[],fit_func_I='linear',show_eqn_I=True,show_r2_I=True,filename_I=None,show_plot_I=True):
     '''Create a scatter line plot and fitted line'''
     # Create the fit:
     if fit_func_I == 'linear':
         slope, intercept, r_value, p_value, std_err = linregress(x_data_I, y_data_I);
         r2 = r_value**2; #coefficient of determination
         x2 = x_data_I;
         y2 = [];
         for d in x2:
             y2.append(d*slope+intercept);
     elif fit_func_I=='lowess':
         #lowess
         x2 = numpy.array(x_data_I);
         y2_lowess = lowess.lowess(x2,numpy.array(y_data_I),f=0.1,iter=100)
         y2 = numpy.zeros_like(y2_lowess);
         for i,y2s in enumerate(y2_lowess):
             if i==0:
                 y2[i] = y2s;
             elif i!=0 and y2s<y2[i-1]:
                 y2[i] = y2[i-1];
             else:
                 y2[i] = y2s;
     # Create a Figure object.
     fig = plt.figure()
     # Create an Axes object.
     ax = fig.add_subplot(1,1,1) # one row, one column, first plot
     # Plot the data.
     ax.scatter(x_data_I, y_data_I, color="blue", marker="o")
     ax.plot(x2,y2,color='red',linestyle='-')
     # Add a title.
     ax.set_title(title_I)
     # Add some axis labels.
     ax.set_xlabel(xlabel_I)
     ax.set_ylabel(ylabel_I)
     # Label data points.
     if text_labels_I:
         for i, txt in enumerate(text_labels_I):
             ax.annotate(txt, (x_data_I[i],y_data_I[i]))
     # Show fit equation
     if show_eqn_I:
         fit_eqn = "y = " + str(slope) + "*x";
         if intercept < 0: fit_eqn += " " + str(intercept);
         elif intercept > 0: fit_eqn += " +" + str(intercept);
         ax.annotate(fit_eqn,(min(x_data_I),max(y_data_I)));
     # Show r2 value
     if show_r2_I:
         r2_label = "r2 = " + str(r2);
         ax.annotate(r2_label,(min(x_data_I),max(y_data_I)-0.5));
     # Show legend
     # Produce an image.
     if filename_I:
         fig.savefig(filename_I)
     # Show the image.
     if show_plot_I:
         plt.show();
    def _initialize(self, data1, data2):
        try:
            import statsmodels.api as sm
            lowess = sm.nonparametric.lowess
        except ImportError:
            print "==================================="
            print "Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'"
            print "==================================="

        result = lowess(numpy.array(data1), numpy.array(data2))
        return result
    def _initialize(self, data1, data2):
        try:
            import cylowess
            lowess = cylowess.lowess
        except ImportError:
            print "==================================="
            print "Cannot import the module lowess from 'cylowess', \nplease install the cylowess package according to http://slendermeans.org/lowess-speed.html (see also README)"
            print "==================================="

        delta = (max(data1) - min(data1)) * 0.01
        result = lowess(numpy.array(data1), numpy.array(data2), delta=delta)
        return result
Esempio n. 12
0
    def _initialize(self, data1, data2):
        try:
            import statsmodels.api as sm
            lowess = sm.nonparametric.lowess
        except ImportError:
            print "==================================="
            print "Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'"
            print "==================================="

        # Input data is y/x -> needs switch
        result = lowess(numpy.array(data2), numpy.array(data1))
        return result
Esempio n. 13
0
    def _initialize(self, data1, data2):
        try:
            import cylowess
            lowess = cylowess.lowess
        except ImportError:
            print "==================================="
            print "Cannot import the module lowess from 'cylowess', \nplease install the cylowess package according to http://slendermeans.org/lowess-speed.html (see also README)"
            print "==================================="

        delta = (max(data1) - min(data1)) * 0.01
        # Input data is y/x -> needs switch
        result = lowess(numpy.array(data2), numpy.array(data1), delta=delta, frac=0.1, it=10)
        return [ r[0] for r in result], [r[1] for r in result]
Esempio n. 14
0
    def _initialize(self, data1, data2):
        try:
            import statsmodels.api as sm
            lowess = sm.nonparametric.lowess
        except ImportError:
            print("===================================")
            print("Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'")
            print("===================================")

        # NOTE: delta parameter is only available from statsmodels > 0.5.0
        delta = (max(data1) - min(data1)) * 0.01

        # Input data is y/x -> needs switch
        result = lowess(numpy.array(data2), numpy.array(data1), delta=delta, frac=0.1, it=10)
        return [ r[0] for r in result], [r[1] for r in result]
Esempio n. 15
0
    def _initialize(self, data1, data2):
        try:
            import cylowess
            lowess = cylowess.lowess
        except ImportError:
            print "==================================="
            print "Cannot import the module lowess from 'cylowess', \nplease install the cylowess package according to http://slendermeans.org/lowess-speed.html (see also README)"
            print "==================================="

        delta = (max(data1) - min(data1)) * 0.01
        # Input data is y/x -> needs switch
        result = lowess(numpy.array(data2),
                        numpy.array(data1),
                        delta=delta,
                        frac=0.1,
                        it=10)
        return [r[0] for r in result], [r[1] for r in result]
Esempio n. 16
0
def gc_correct_lowess(gc2bin, raw_counts):
    rt_dict = {}
    for gc_content, bin_list in gc2bin.items():
        value_list = np.array([raw_counts[b] for b in bin_list])
        if len(value_list) <= 3:
            cor_value_list = value_list
        else:
            average_depth = average_depth_in_gc(gc2bin, raw_counts, gc_content)
            # key_list, value_list = zip(*sorted(bin_counts.items()))
            x = np.array(range(len(bin_list)))
            try:
                ur_loess = lowess(x, value_list)
                cor_value_list = value_list - (ur_loess - average_depth)
            except FloatingPointError:
                cor_value_list = value_list
                pass
        for b, v in zip(bin_list, cor_value_list):
            rt_dict[b] = v
    return rt_dict
Esempio n. 17
0
    def _initialize(self, data1, data2):
        try:
            import statsmodels.api as sm
            lowess = sm.nonparametric.lowess
        except ImportError:
            print("===================================")
            print(
                "Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'"
            )
            print("===================================")

        # NOTE: delta parameter is only available from statsmodels > 0.5.0
        delta = (max(data1) - min(data1)) * 0.01

        # Input data is y/x -> needs switch
        result = lowess(numpy.array(data2),
                        numpy.array(data1),
                        delta=delta,
                        frac=0.1,
                        it=10)
        return [r[0] for r in result], [r[1] for r in result]
Esempio n. 18
0
def smooth_function(zinput,smooth_method = 'lowess',span = .05):
    if smooth_method not in ['lowess','triangle']:
        return zinput
    xarray = []
    yarray = []
    years = zinput.keys()
    for key in years:
        if zinput[key]!='None':
            xarray.append(float(key))
            yarray.append(float(zinput[key]))
    from numpy import array
    x = array(xarray)
    y = array(yarray)
    if smooth_method == 'lowess':
        #print "starting lowess smoothing<br>"
        from Bio.Statistics.lowess import lowess
        smoothed = lowess(x,y,float(span),3)
        x = [int(p) for p in x]
        returnval = dict(zip(x,smoothed))
        return returnval
    if smooth_method == 'triangle':
        #print "starting triangle smoothing<br>"
        span = int(span) #Takes the floor--so no smoothing on a span < 1.
        returnval = zinput
        windowsize = span*2 + 1
        from numpy import average
        for key in zinput:
            surrounding = array(range(windowsize),dtype=float)
            weights = array(range(windowsize))
            for i in range(windowsize):
                key_dist = i - span #if span is 2, the zeroeth element is -2, the second element is 0 off, etc.
                workingon = int(key) + key_dist
                try:
                    surrounding[i] = float(zinput[workingon])
                    weights[i] = (span + 1 - abs(key_dist))**.5
                except:
                    surrounding[i] = 0
                    weights[i] = 0
            returnval[key] = round(average(surrounding,weights=weights),3)
        return returnval
Esempio n. 19
0
    def plot_rain_by_year(self):
        # Get data
        data = self.get_rain_totals()
        years = np.asarray(data[0]).astype(np.float)
        x_pos = np.arange(len(years))
        rain = np.asarray(data[1])

        # Plot bars
        plt.bar(x_pos, rain, align='center', alpha=0.4)
        plt.xticks(x_pos, years)
        plt.tick_params(axis='x', which='both', bottom='off', top='off')
        plt.xlabel('Rain')

        # Plot average
        avg = np.average(rain)
        plt.axhline(avg)

        # Plot trend line
        l = lowess(years, rain, f=0.5)
        plt.plot(l, linestyle='--')

        plt.title('Total rain in London per year')
        plt.show()
Esempio n. 20
0
def get_GC_depth_correction_vect(sum_depths,n_bases,GC_width,max_correction_factor,calc_correction_factor=True,correct_range=False):

    #max_correction_factor = (max_correction_factor==-1) and 999999 or max_correction_factor
    #print "getting GC correction factor: max scale factor %d"%(max_correction_factor)


    assert(sum_depths.shape[0]==2*GC_width+1+1) #have to count 0 as well~ so, 41+1

    frac_bases = n_bases.astype(np.float64)/n_bases.sum()

    ave_depths = sum_depths/n_bases.astype(np.float64)
    GCp = np.arange(0,2*GC_width+1+1)/float((2*GC_width+1))

    ave_depths[np.where(np.isnan(ave_depths))] = 0

    #now, chop off the nans, make the array only as wide as the non-nans    
    ave_depths2_8 = ave_depths[np.where(np.logical_and(GCp>=0.25,GCp<=.75))]
    GCp2_8 = GCp[np.where(np.logical_and(GCp>=0.25,GCp<=.75))]
    GCp0_2 = GCp[np.where(GCp<0.25)]
    GCp8_10 = GCp[np.where(GCp>.75)]

    #now apply lowess on this
    lowess_depth = biostats.lowess(GCp2_8,ave_depths2_8,f=.15)
    #lowess_depth = biostats.lowess(GCp2_8,ave_depths2_8,f=.1) #USING .1... not working... wy?
    #lowess_depth = biostats.lowess(GCp2_8,ave_depths2_8,f=.25)


    line_func = lambda p, x: (p[0]*x+p[1])

    k=5
    y1 = lowess_depth[0:k+1]
    x1 = GCp2_8[0:k+1]

    l = lowess_depth.shape[0]
    y2 = lowess_depth[l-k:l+1]
    x2 = GCp2_8[l-k:l+1]

    print lowess_depth
    print "fit lowess on",ave_depths2_8
    print GCp0_2
    print GCp8_10
    print x1,y2
    print x2,y2

    p1 = get_line_params(x1,y1)
    p2 = get_line_params(x2,y2)



    left_line = line_func(p1,GCp0_2)
    right_line = line_func(p2,GCp8_10) 

    lowess_depth = np.r_[left_line,lowess_depth,right_line]                    

    #lowess_depth[np.where(lowess_depth<=0)=np.min(lowess_depth[np.where(lowess_depth>0)])
    mu = sum_depths.astype(np.float64).sum()/n_bases.astype(np.float64).sum()
    #correction = lowess_depth - mu
    lowess_depth = np.clip(lowess_depth,1e-10,1e30)
    correction = mu/lowess_depth

    if(correct_range):
        #print "correcting in range .75"
        GC_max = 0.75
        GC_min = 0.2
        iGC_max = (np.where(GCp>GC_max))[0][0]
        iGC_min = (np.where(GCp<GC_min))[0][0]
        max_correction_factor = correction[iGC_max]
        correction=np.clip(correction,1.0/max_correction_factor,max_correction_factor)
    elif(max_correction_factor<=0):
        correction=np.ones(correction.shape[0])
    else:
        correction=np.clip(correction,1.0/max_correction_factor,max_correction_factor)

    return GCp,ave_depths,GCp2_8,lowess_depth,correction,mu
Esempio n. 21
0
for y in range(0, 12):
    for d in range(0, len(aveSig)):
        index = y * len(aveSig) + d
        aveTrend[index] = aveSig[d]

#subtract the average from the signal
ltTrend = numpy.copy(dvals)
for y in range(0, 12):
    for d in range(0, len(aveSig)):
        index = y * len(aveSig) + d
        ltTrend[index] = dvals[index] - aveSig[d]

#loess smoothed trend (rough stuff)
x = numpy.array(range(0, len(dvals)), numpy.float)
y = numpy.array(ltTrend, numpy.float)
result = lowess.lowess(x, y, f=0.5 / 3., iter=2)

#residuals
resids = ltTrend - result

#plots

pyplot.subplot(4, 1, 1)
pyplot.plot(x, dvals)
pyplot.subplot(4, 1, 2)
pyplot.plot(x, aveTrend)
pyplot.subplot(4, 1, 3)
pyplot.plot(x, result)
pyplot.subplot(4, 1, 4)
pyplot.plot(x, resids)
pyplot.show()
Esempio n. 22
0
def gen_mh_scatter(x, y, color='green', xlabel=None, ylabel=None,
                   one_line=False, fig=None, ax=None, marker='.',
                   connect=False, label=None, trendline=None, alpha=1.0,
                   mask=None, zero_lines=False, edgecolors='none', grid=None,
                   figsize=FIG_SIZE, linestyle='-'):
    """
    Notes:

    one_line:   False, <style, e.g. 'r--', 'r:'>
    zero_lines: T/F
    grid:       None, 'major, 'minor', 'both'
    edgecolors: 'none', 'green' etc
    zero_lines: T/F
    trendline:  False, 1, 2, ... (degree), 'lowess'

    """
    # some defaults if no style is provided, just True:
    if one_line is True:
        one_line = 'r--'

    if fig is None or ax is None:
        #fig, ax = plt.subplots()
        fig = plt.figure(figsize=figsize, dpi=200)
        ax = fig.add_subplot(111)

    if connect:
        ax.set_color_cycle([color])
        ax.plot(x, y, c=color, marker=marker, label=label, alpha=alpha)
    else:
        ax.scatter(x, y, c=color, marker=marker, edgecolors=edgecolors,
                   label=label, alpha=alpha)
    if not grid is None:
        ax.grid(b=True, which=grid)
    if xlabel:
        ax.set(xlabel=xlabel)
    if ylabel:
        ax.set(ylabel=ylabel)
    if one_line:
        lmin = max(min(x), min(y))
        lmax = min(max(x), max(y))
        ax.plot((lmin, lmax), (lmin, lmax), one_line)
    if zero_lines:
        lmin = max(min(x), min(y))
        lmax = min(max(x), max(y))
        ax.axvline(0)
        ax.axhline(0)

    if trendline:
        if not mask is None:
            x = x[mask]
            y = y[mask]
        if (len(set(x)) > 1 ) and (len(set(y)) > 1):
            if trendline == 'lowess':
                order = np.argsort(x)
                lx = x[order]
                ly = lowess(x[order], y[order])
            else:
                coefs = np.polyfit(x, y, trendline)
                lx = np.linspace(min(x), max(x), 100)
                ly = [polyfit_apply(coefs, x) for x in lx]
            ax.plot(lx, ly, '-', color=color)

    return fig, ax
Esempio n. 23
0
 def scatterLinePlot(self,
                     title_I,
                     xlabel_I,
                     ylabel_I,
                     x_data_I,
                     y_data_I,
                     text_labels_I=[],
                     fit_func_I='linear',
                     show_eqn_I=True,
                     show_r2_I=True,
                     filename_I=None,
                     show_plot_I=True):
     '''Create a scatter line plot and fitted line'''
     # Create the fit:
     if fit_func_I == 'linear':
         slope, intercept, r_value, p_value, std_err = linregress(
             x_data_I, y_data_I)
         r2 = r_value**2
         #coefficient of determination
         x2 = x_data_I
         y2 = []
         for d in x2:
             y2.append(d * slope + intercept)
     elif fit_func_I == 'lowess':
         #lowess
         x2 = numpy.array(x_data_I)
         y2_lowess = lowess.lowess(x2,
                                   numpy.array(y_data_I),
                                   f=0.1,
                                   iter=100)
         y2 = numpy.zeros_like(y2_lowess)
         for i, y2s in enumerate(y2_lowess):
             if i == 0:
                 y2[i] = y2s
             elif i != 0 and y2s < y2[i - 1]:
                 y2[i] = y2[i - 1]
             else:
                 y2[i] = y2s
     # Create a Figure object.
     fig = plt.figure()
     # Create an Axes object.
     ax = fig.add_subplot(1, 1, 1)  # one row, one column, first plot
     # Plot the data.
     ax.scatter(x_data_I, y_data_I, color="blue", marker="o")
     ax.plot(x2, y2, color='red', linestyle='-')
     # Add a title.
     ax.set_title(title_I)
     # Add some axis labels.
     ax.set_xlabel(xlabel_I)
     ax.set_ylabel(ylabel_I)
     # Label data points.
     if text_labels_I:
         for i, txt in enumerate(text_labels_I):
             ax.annotate(txt, (x_data_I[i], y_data_I[i]))
     # Show fit equation
     if show_eqn_I:
         fit_eqn = "y = " + str(slope) + "*x"
         if intercept < 0: fit_eqn += " " + str(intercept)
         elif intercept > 0: fit_eqn += " +" + str(intercept)
         ax.annotate(fit_eqn, (min(x_data_I), max(y_data_I)))
     # Show r2 value
     if show_r2_I:
         r2_label = "r2 = " + str(r2)
         ax.annotate(r2_label, (min(x_data_I), max(y_data_I) - 0.5))
     # Show legend
     # Produce an image.
     if filename_I:
         fig.savefig(filename_I)
     # Show the image.
     if show_plot_I:
         plt.show()
def smooth_function(zinput,smooth_method = 'lowess',span = .05):
    if smooth_method not in ['lowess','triangle','rectangle']:
        return zinput
    xarray = []
    yarray = []
    years = zinput.keys()
    years.sort()
    for key in years:
        if zinput[key]!='None':
            xarray.append(float(key))
            yarray.append(float(zinput[key]))
    from numpy import array
    x = array(xarray)
    y = array(yarray)
    if smooth_method == 'lowess':
        #print "starting lowess smoothing<br>"
        from Bio.Statistics.lowess import lowess
        smoothed = lowess(x,y,float(span)/100,3)
        x = [int(p) for p in x]
        returnval = dict(zip(x,smoothed))
        return returnval
    if smooth_method == 'rectangle':
        from math import log
        #print "starting triangle smoothing<br>"
        span = int(span) #Takes the floor--so no smoothing on a span < 1.
        returnval = zinput
        windowsize = span*2 + 1
        from numpy import average
        for i in range(len(xarray)):
            surrounding = array(range(windowsize),dtype=float)
            weights = array(range(windowsize),dtype=float)
            for j in range(windowsize):
                key_dist = j - span #if span is 2, the zeroeth element is -2, the second element is 0 off, etc.
                workingon = i + key_dist
                if workingon >= 0 and workingon < len(xarray):
                    surrounding[j] = float(yarray[workingon])
                    weights[j] = 1
                else:
                    surrounding[j] = 0
                    weights[j] = 0
            returnval[xarray[i]] = round(average(surrounding,weights=weights),3)
        return returnval
    if smooth_method == 'triangle':
        from math import log
        #print "starting triangle smoothing<br>"
        span = int(span) #Takes the floor--so no smoothing on a span < 1.
        returnval = zinput
        windowsize = span*2 + 1
        from numpy import average
        for i in range(len(xarray)):
            surrounding = array(range(windowsize),dtype=float)
            weights = array(range(windowsize),dtype=float)
            for j in range(windowsize):
                key_dist = j - span #if span is 2, the zeroeth element is -2, the second element is 0 off, etc.
                workingon = i + key_dist
                if workingon >= 0 and workingon < len(xarray):
                    surrounding[j] = float(yarray[workingon])
                    #This isn't actually triangular smoothing: I dampen it by the logs, to keep the peaks from being too too big.
                    #The minimum is '2', since log(1) == 0, which is a nonesense weight.
                    weights[j] = log(span + 2 - abs(key_dist))
                else:
                    surrounding[j] = 0
                    weights[j] = 0
            
            returnval[xarray[i]] = round(average(surrounding,weights=weights),3)
        return returnval
def null_model(
    matrix,
    positions=None,
    lengths=None,
    model="uniform",
    noisy=False,
    circ=False,
    sparsity=False,
):
    """Attempt to compute a 'null model' of the matrix given a model
    to base itself on.
    """

    n, m = matrix.shape
    positions_supplied = True
    if positions is None:
        positions = range(n)
        positions_supplied = False
    if lengths is None:
        lengths = np.diff(positions)

    N = np.copy(matrix)

    contigs = np.array(positions_to_contigs(positions))

    def is_inter(i, j):
        return contigs[i] != contigs[j]

    diagonal = np.diag(matrix)

    if model == "uniform":
        if positions_supplied:
            trans_contacts = np.array([
                matrix[i, j] for i, j in itertools.product(range(n), range(m))
                if is_inter(i, j)
            ])
            mean_trans_contacts = np.average(trans_contacts)
        else:
            mean_trans_contacts = np.average(matrix) - diagonal / len(diagonal)

        N = np.random.poisson(lam=mean_trans_contacts, size=(n, m))
        np.fill_diagonal(N, diagonal)

    elif model == "distance":
        distances = distance_diagonal_law(matrix, positions)
        N = np.array([[distances[min(abs(i - j), n)] for i in range(n)]
                      for j in range(n)])

    elif model == "rippe":

        trans_contacts = np.array([
            matrix[i, j] for i, j in itertools.product(range(n), range(m))
            if is_inter(i, j)
        ])
        mean_trans_contacts = np.average(trans_contacts)
        kuhn, lm, slope, d, A = rippe_parameters(matrix, positions, circ=circ)

        def jc(s, frag):
            dist = s - circ * (s**2) / lengths[frag]
            computed_contacts = (0.53 * A * (kuhn**(-3.)) * (dist**slope) *
                                 np.exp((d - 2) / (dist + d)))
            return np.maximum(computed_contacts, mean_trans_contacts)

        for i in range(n):
            for j in range(n):
                if not is_inter(i, j) and i != j:
                    posi, posj = positions[i], positions[j]
                    N[i, j] = jc(np.abs(posi - posj) * lm / kuhn, frag=j)
                else:
                    N[i, j] = mean_trans_contacts

    if sparsity:
        contact_sum = matrix.sum(axis=0)
        n = len(contact_sum)
        try:
            from Bio.Statistics import lowess

            trend = lowess.lowess(np.array(range(n), dtype=np.float64),
                                  contact_sum,
                                  f=0.03)
        except ImportError:
            expected_size = int(np.amax(contact_sum) / np.average(contact_sum))
            w = min(max(expected_size, 20), 100)
            trend = np.array(
                [np.average(contact_sum[i:min(i + w, n)]) for i in range(n)])

        cov_score = np.sqrt((trend - np.average(trend)) / np.std(trend))

        N = ((N * cov_score).T) * cov_score

    if noisy:
        if callable(noisy):
            noise_function = noisy
        return noise_function(N)
    else:
        return N
	#Correct any extreme outliers caused by low read count
	outliers = []
	for window in xrange(len(gc_curve)):
		if read_counts[window] < 10:
			if window == 0 and gc_curve[window] - 0.5 > gc_curve[window + 1]:
				outliers.append(window)
				gc_curve[window] = gc_curve[window + 1]
			elif window == len(gc_curve) - 1 and gc_curve[window] - 0.5 > gc_curve[window - 1]:
				outliers.append(window)
				gc_curve[window] = gc_curve[window + 1]
			elif gc_curve[window] - 0.5 > gc_curve[window - 1] and gc_curve[window] - 0.5 > gc_curve[window + 1]:
				outliers.append(window)
				gc_curve[window] = (gc_curve[window - 1] + gc_curve[window + 1])/2.
	
	gc_x = np.array([x / float(len(gc_curve) - 1) for x in xrange(len(gc_curve))])
	smoothed_gc_curve = lowess.lowess(gc_x, gc_curve, f = 0.1, iter = 1)
	smoothed_gc_curve = [max(0.0, x) for x in smoothed_gc_curve]
	
	outfile = open(outname, 'w')

	outfile.write('# GC Curve file combined from %s\n' %(', '.join(args.curves)))
	outfile.write('# Curve calculated from %i reads at %i locations\n' %(sum(read_counts), sum(loc_counts)))
	if len(outliers) > 0:
		outfile.write('# Windows with corrected extreme outlier GC bias: %s\n' %(', '.join(['%.*f-%.*f' %(len(str((len(gc_curve) - 1))) + 2, window * 1.0/(len(gc_curve) - 1), len(str((len(gc_curve) - 1))) + 2, min(1., (window + 1) * 1.0/(len(gc_curve) - 1) - (1. / 10 ** (len(str((len(gc_curve) - 1))) + 2)))) for window in outliers])))
	outfile.write('#\n')
	outfile.write('#GC_content\tSmoothed_GC_bias\tRaw_GC_Bias\tNo_of_reads\tNo_of_locations\n')
	for window, bias in enumerate(gc_curve):
		outfile.write('%.*f-%.*f\t%f\t%f\t%i\t%i\n' %(len(str((len(gc_curve) - 1))) + 2, window * 1.0/(len(gc_curve) - 1), len(str((len(gc_curve) - 1))) + 2, min(1., (window + 1) * 1.0/(len(gc_curve) - 1) - (1. / 10 ** (len(str((len(gc_curve) - 1))) + 2))), smoothed_gc_curve[window], bias, read_counts[window], loc_counts[window]))

	if args.plot:
		plt.clf()
Esempio n. 27
0
    def multiScatterLinePlot(self,
                             title_I,
                             xlabel_I,
                             ylabel_I,
                             x_data_I=[],
                             y_data_I=[],
                             data_labels_I=[],
                             text_labels_I=[],
                             fit_func_I='linear',
                             show_eqn_I=True,
                             show_r2_I=True,
                             filename_I=None,
                             show_plot_I=True,
                             show_legend_I=True):
        '''Create a scatter line plot and fitted line'''
        #Input:
        #   x_data_I = [[a1,a2,a3...],[b1,b2,b3,...],...] of type float
        #   y_data_I = [[a1,a2,a3...],[b1,b2,b3,...],...] of type float
        #   data_labels_I = [a,b,...] of type string
        #   text_labels_I = [[a1,a2,a3...],[b1,b2,b3,...],...] of type string

        # Create a Figure object.
        fig = plt.figure()
        # Create an Axes object.
        ax = fig.add_subplot(1, 1, 1)  # one row, one column, first plot
        # Generate colors
        colors = iter(cm.rainbow(numpy.linspace(0, 1, len(x_data_I))))

        for cnt_data, data in enumerate(y_data_I):
            # Create the fit:
            if fit_func_I == 'linear':
                slope, intercept, r_value, p_value, std_err = linregress(
                    x_data_I[cnt_data], y_data_I[cnt_data])
                r2 = r_value**2
                #coefficient of determination
                x2 = x_data_I
                y2 = []
                for d in x2:
                    y2.append(d * slope + intercept)
            elif fit_func_I == 'lowess':
                #lowess
                x2 = numpy.array(x_data_I[cnt_data])
                y2_lowess = lowess.lowess(x2,
                                          numpy.array(y_data_I[cnt_data]),
                                          f=0.1,
                                          iter=100)
                y2 = numpy.zeros_like(y2_lowess)
                for i, y2s in enumerate(y2_lowess):
                    if i == 0:
                        y2[i] = y2s
                    elif i != 0 and y2s < y2[i - 1]:
                        y2[i] = y2[i - 1]
                    else:
                        y2[i] = y2s
            # Plot the data.
            c = next(colors)
            ax.scatter(x_data_I[cnt_data],
                       y_data_I[cnt_data],
                       color=c,
                       marker="o",
                       label=data_labels_I[cnt_data])
            if fit_func_I:
                ax.plot(x2,
                        y2,
                        linestyle='-',
                        color=c,
                        label=data_labels_I[cnt_data] + '_fitted')
            # Add a title.
            ax.set_title(title_I)
            # Add some axis labels.
            ax.set_xlabel(xlabel_I)
            ax.set_ylabel(ylabel_I)
            # Label data points.
            if text_labels_I:
                for i, txt in enumerate(text_labels_I[cnt_data]):
                    ax.annotate(txt,
                                (x_data_I[cnt_data][i], y_data_I[cnt_data][i]))
            # Show fit equation
            if fit_func_I == 'linear' and show_eqn_I:
                fit_eqn = "y = " + str(slope) + "*x"
                if intercept < 0: fit_eqn += " " + str(intercept)
                elif intercept > 0: fit_eqn += " +" + str(intercept)
                ax.annotate(fit_eqn,
                            (min(x_data_I[cnt_data]), max(y_data_I[cnt_data])))
            # Show r2 value
            if fit_func_I == 'linear' and show_r2_I:
                r2_label = "r2 = " + str(r2)
                ax.annotate(
                    r2_label,
                    (min(x_data_I[cnt_data]), max(y_data_I[cnt_data]) - 0.5))

        # Show legend
        if show_legend_I:
            plt.legend(loc='best')
        # Produce an image.
        if filename_I:
            fig.savefig(filename_I)
        # Show the image.
        if show_plot_I:
            plt.show()
for y in range(0,12):
 for d in range(0, len(aveSig)):
  index = y*len(aveSig)+d
  aveTrend[index] = aveSig[d]

#subtract the average from the signal
ltTrend = numpy.copy(dvals)
for y in range(0,12):
 for d in range(0, len(aveSig)):
  index = y*len(aveSig)+d
  ltTrend[index] = dvals[index] - aveSig[d]

#loess smoothed trend (rough stuff)
x = numpy.array(range(0,len(dvals)), numpy.float)
y = numpy.array(ltTrend, numpy.float)
result = lowess.lowess(x,y, f=0.5/3.,iter=2)

#residuals
resids = ltTrend-result

#plots

pyplot.subplot(4,1,1)
pyplot.plot(x, dvals)
pyplot.subplot(4,1,2)
pyplot.plot(x, aveTrend)
pyplot.subplot(4,1,3)
pyplot.plot(x, result)
pyplot.subplot(4,1,4)
pyplot.plot(x, resids)
pyplot.show()
Esempio n. 29
0
def get_GC_depth_correction_vect(sum_depths,
                                 n_bases,
                                 GC_width,
                                 max_correction_factor,
                                 calc_correction_factor=True,
                                 correct_range=False):

    #max_correction_factor = (max_correction_factor==-1) and 999999 or max_correction_factor
    #print "getting GC correction factor: max scale factor %d"%(max_correction_factor)

    assert (sum_depths.shape[0] == 2 * GC_width + 1 + 1
            )  #have to count 0 as well~ so, 41+1

    frac_bases = n_bases.astype(np.float64) / n_bases.sum()

    ave_depths = sum_depths / n_bases.astype(np.float64)
    GCp = np.arange(0, 2 * GC_width + 1 + 1) / float((2 * GC_width + 1))

    ave_depths[np.where(np.isnan(ave_depths))] = 0

    #now, chop off the nans, make the array only as wide as the non-nans
    ave_depths2_8 = ave_depths[np.where(np.logical_and(GCp >= 0.25,
                                                       GCp <= .75))]
    GCp2_8 = GCp[np.where(np.logical_and(GCp >= 0.25, GCp <= .75))]
    GCp0_2 = GCp[np.where(GCp < 0.25)]
    GCp8_10 = GCp[np.where(GCp > .75)]

    #now apply lowess on this
    lowess_depth = biostats.lowess(GCp2_8, ave_depths2_8, f=.15)
    #lowess_depth = biostats.lowess(GCp2_8,ave_depths2_8,f=.1) #USING .1... not working... wy?
    #lowess_depth = biostats.lowess(GCp2_8,ave_depths2_8,f=.25)

    line_func = lambda p, x: (p[0] * x + p[1])

    k = 5
    y1 = lowess_depth[0:k + 1]
    x1 = GCp2_8[0:k + 1]

    l = lowess_depth.shape[0]
    y2 = lowess_depth[l - k:l + 1]
    x2 = GCp2_8[l - k:l + 1]

    print(lowess_depth)
    print("fit lowess on", ave_depths2_8)
    print(GCp0_2)
    print(GCp8_10)
    print(x1, y2)
    print(x2, y2)

    p1 = get_line_params(x1, y1)
    p2 = get_line_params(x2, y2)

    left_line = line_func(p1, GCp0_2)
    right_line = line_func(p2, GCp8_10)

    lowess_depth = np.r_[left_line, lowess_depth, right_line]

    #lowess_depth[np.where(lowess_depth<=0)=np.min(lowess_depth[np.where(lowess_depth>0)])
    mu = sum_depths.astype(np.float64).sum() / n_bases.astype(np.float64).sum()
    #correction = lowess_depth - mu
    lowess_depth = np.clip(lowess_depth, 1e-10, 1e30)
    correction = mu / lowess_depth

    if (correct_range):
        #print "correcting in range .75"
        GC_max = 0.75
        GC_min = 0.2
        iGC_max = (np.where(GCp > GC_max))[0][0]
        iGC_min = (np.where(GCp < GC_min))[0][0]
        max_correction_factor = correction[iGC_max]
        correction = np.clip(correction, 1.0 / max_correction_factor,
                             max_correction_factor)
    elif (max_correction_factor <= 0):
        correction = np.ones(correction.shape[0])
    else:
        correction = np.clip(correction, 1.0 / max_correction_factor,
                             max_correction_factor)

    return GCp, ave_depths, GCp2_8, lowess_depth, correction, mu
Esempio n. 30
0
    def multiScatterLinePlot(self,title_I,xlabel_I,ylabel_I,x_data_I=[],y_data_I=[],data_labels_I=[],text_labels_I=[],fit_func_I='linear',show_eqn_I=True,show_r2_I=True,filename_I=None,show_plot_I=True,show_legend_I=True):
        '''Create a scatter line plot and fitted line'''
        #Input:
        #   x_data_I = [[a1,a2,a3...],[b1,b2,b3,...],...] of type float
        #   y_data_I = [[a1,a2,a3...],[b1,b2,b3,...],...] of type float
        #   data_labels_I = [a,b,...] of type string
        #   text_labels_I = [[a1,a2,a3...],[b1,b2,b3,...],...] of type string
        
        # Create a Figure object.
        fig = plt.figure()
        # Create an Axes object.
        ax = fig.add_subplot(1,1,1) # one row, one column, first plot
        # Generate colors
        colors=iter(cm.rainbow(numpy.linspace(0,1,len(x_data_I))))

        for cnt_data, data in enumerate(y_data_I):
            # Create the fit:
            if fit_func_I == 'linear':
                slope, intercept, r_value, p_value, std_err = linregress(x_data_I[cnt_data], y_data_I[cnt_data]);
                r2 = r_value**2; #coefficient of determination
                x2 = x_data_I;
                y2 = [];
                for d in x2:
                    y2.append(d*slope+intercept);
            elif fit_func_I=='lowess':
                #lowess
                x2 = numpy.array(x_data_I[cnt_data]);
                y2_lowess = lowess.lowess(x2,numpy.array(y_data_I[cnt_data]),f=0.1,iter=100)
                y2 = numpy.zeros_like(y2_lowess);
                for i,y2s in enumerate(y2_lowess):
                    if i==0:
                        y2[i] = y2s;
                    elif i!=0 and y2s<y2[i-1]:
                        y2[i] = y2[i-1];
                    else:
                        y2[i] = y2s;
            # Plot the data.
            c = next(colors);
            ax.scatter(x_data_I[cnt_data], y_data_I[cnt_data],color=c, marker="o",label=data_labels_I[cnt_data])
            if fit_func_I:
                ax.plot(x2,y2,linestyle='-',color=c,label=data_labels_I[cnt_data]+'_fitted')
            # Add a title.
            ax.set_title(title_I)
            # Add some axis labels.
            ax.set_xlabel(xlabel_I)
            ax.set_ylabel(ylabel_I)
            # Label data points.
            if text_labels_I:
                for i, txt in enumerate(text_labels_I[cnt_data]):
                    ax.annotate(txt, (x_data_I[cnt_data][i],y_data_I[cnt_data][i]))
            # Show fit equation
            if fit_func_I == 'linear' and show_eqn_I:
                fit_eqn = "y = " + str(slope) + "*x";
                if intercept < 0: fit_eqn += " " + str(intercept);
                elif intercept > 0: fit_eqn += " +" + str(intercept);
                ax.annotate(fit_eqn,(min(x_data_I[cnt_data]),max(y_data_I[cnt_data])));
            # Show r2 value
            if fit_func_I == 'linear' and show_r2_I:
                r2_label = "r2 = " + str(r2);
                ax.annotate(r2_label,(min(x_data_I[cnt_data]),max(y_data_I[cnt_data])-0.5));

        # Show legend
        if show_legend_I:
            plt.legend(loc='best');
        # Produce an image.
        if filename_I:
            fig.savefig(filename_I)
        # Show the image.
        if show_plot_I:
            plt.show();
Esempio n. 31
0
    def fit_trajectories(self,x_I,y_I,fit_func_I='lowess',plot_textLabels_I=None,plot_fit_I=False):
        '''fit trajectory growth rate data to a smoothing function'''
        #Input:
        #   x_I = ale_time
        #   y_I = growth_rate
        #Output:
        #   x_O = ale_time_fitted
        #   y_O = growth_rate_fitted

        #cnt = 1;
        x = [];
        y = [];
        x = x_I;
        y = y_I;
        if fit_func_I=='spline':
            #spline
            tck = splrep(x,y,k=3,s=.025) #no smoothing factor
            #tck = splrep(x,y,k=3,task=-1,t=10) #no smoothing factor
            x2 = linspace(min(x),max(x),500)
            y2_spline= splev(x2,tck)
            y2 = numpy.zeros_like(y2_spline);
            for i,y2s in enumerate(y2_spline):
                if i==0:
                    y2[i] = y2s;
                elif i!=0 and y2s<y2[i-1]:
                    y2[i] = y2[i-1];
                else:
                    y2[i] = y2s;
        elif fit_func_I=='movingWindow':
            #moving window filter
            x2 = numpy.array(x);
            y2 = smooth(numpy.array(y),window_len=10, window='hanning');
        elif fit_func_I=='legendre':
            #legendre smoothing optimization
            smooth = legendre_smooth(len(x),1,1e-4,25)
            x2 = numpy.array(x);
            y2 = smooth.fit(numpy.array(y))
        elif fit_func_I=='lowess':
            #lowess
            x2 = numpy.array(x);
            y2_lowess = lowess.lowess(x2,numpy.array(y),f=0.1,iter=100)
            y2 = numpy.zeros_like(y2_lowess);
            for i,y2s in enumerate(y2_lowess):
                if i==0:
                    y2[i] = y2s;
                elif i!=0 and y2s<y2[i-1]:
                    y2[i] = y2[i-1];
                else:
                    y2[i] = y2s;
        else:
            print("fit function not recongnized");
        if plot_fit_I:
            ##QC plot using MatPlotLib
            # Create a Figure object.
            fig = pp.figure();
            # Create an Axes object.
            ax = fig.add_subplot(1,1,1) # one row, one column, first plot
            ## Add a title.
            #ax.set_title(k['sample_label'])
            # Set the axis
            pp.axis([0,max(x),0,max(y)+0.1]);
            # Add axis labels.
            ax.set_xlabel('Time [days]')
            ax.set_ylabel('GR [hr-1]')
            ## Label data points
            #tck = splrep(x,y,k=3,s=1.); #spline fit with very high smoothing factor
            #x_days = ALEsKOs_textLabels[k['sample_name_abbreviation']]['day']
            #y_days = splev(x_days,tck)
            #for i,txt in enumerate(ALEsKOs_textLabels[k['sample_name_abbreviation']]['dataType']):
            #    ax.annotate(txt, (x_days[i],y_days[i]-.15))
            # Create the plot
            #pp.plot(x_days,y_days,'rx',x,y,'b.',x2,y2,'g')
            pp.plot(x,y,'b.',x2,y2,'g')
            #display the plot
            pp.show()
        #record
        x_O = [];
        y_O = [];
        x_O = x2;
        y_O = y2;
        #cnt += 1;
        return x_O, y_O;