Ejemplo n.º 1
0
def rolling_OLS(sym1="C",sym2="BAC",\
                Frame=percentage(aggregate(sym1='C',sym2='BAC')),\
                avg=10):
    a=Frame
    print(len(a))
    a['roll_Beta']=0
    a['rolling_r2']=0
    
    
    for i in range(len(a)-avg+1):  
        x=a.iloc[i:i+avg-1][sym1] #extraction d'une sous matrice
        y=a.iloc[i:i+avg-1][sym2]
        a.iloc[i+avg-1,2]=stats.linregress(x,y)[0]
        a.iloc[i+avg-1,3]=stats.linregress(x,y)[2]

    a=a[avg:]

    a['Beta_hedged '+sym1]=a[sym1]-a['roll_Beta']*a[sym2]
    a['ZScore']=(a['Beta_hedged '+ sym1]-\
                 np.mean(a['Beta_hedged '+ sym1]))/\
                 np.std(a['Beta_hedged '+ sym1])
    print(np.mean(a['Beta_hedged '+ sym1]))
    print(np.std(a['Beta_hedged '+ sym1]))               

    return a
Ejemplo n.º 2
0
def TL_from_sample(dat_sample, analysis = 'partition', out_folder = './out_files/'):
    """Obtain the empirical and simulated TL relationship given the output file from sample_var().
    
    Here only the summary statistics are recorded for each study, instead of results from each 
    individual sample, because the analysis can be quickly re-done given the input file, without
    going through the time-limiting step of generating samples from partitions.
    The input dat_sample is in the same format as defined by get_var_sample_file().
    The output file has the following columns: 
    study, empirical b, empirical intercept, empirical R-squared, empirical p-value, mean b, intercept, R-squared from samples, 
    percentage of significant TL in samples (at alpha = 0.05), z-score between empirical and sample b, 2.5 and 97.5 percentile of sample b,
    z-score between empirical and sample intercept, 2.5 and 97.5 percentile of sample intercept.
    
    """
    study_list = sorted(np.unique(dat_sample['study']))
    for study in study_list:
        dat_study = dat_sample[dat_sample['study'] == study]
        emp_b, emp_inter, emp_r, emp_p, emp_std_err = stats.linregress(np.log(dat_study['mean']), np.log(dat_study['var']))
        b_list = []
        inter_list = []
        psig = 0
        R2_list = []
        for i_sim in dat_sample.dtype.names[5:]:
            var_sim = dat_study[i_sim][dat_study[i_sim] > 0] # Omit samples of zero variance 
            mean_list = dat_study['mean'][dat_study[i_sim] > 0]
            sim_b, sim_inter, sim_r, sim_p, sim_std_error = stats.linregress(np.log(mean_list), np.log(var_sim))
            b_list.append(sim_b)
            inter_list.append(sim_inter)
            R2_list.append(sim_r ** 2)
            if sim_p < 0.05: psig += 1
        psig /= len(dat_sample.dtype.names[5:])
        out_file = open(out_folder + 'TL_form_' + analysis + '.txt', 'a')
        print>>out_file, study, emp_b, emp_inter, emp_r ** 2, emp_p, np.mean(b_list), np.mean(inter_list), np.mean(R2_list), \
             psig, get_z_score(emp_b, b_list), np.percentile(b_list, 2.5), np.percentile(b_list, 97.5), get_z_score(emp_inter, inter_list), \
             np.percentile(inter_list, 2.5), np.percentile(inter_list, 97.5)
        out_file.close()
def plot_error_norms_with_h(h_vals, nrds, ind=0, att="velo_mag", xlims=None, base=10):
    """
    plot error calculated with 3 different norms as a function of 
    h for a set of model runs
    h_vals is a numpy.ndarray of grid scale values,
    nrds is a list of numa_plotting_tools.NumaRunData objects
    the FINEST GRID is the LAST nrd in nrds
    """
    fig = plt.figure(figsize=(12, 9))
    l1, l2, linf = calc_error_norms(nrds, ind, att)
    x = 1 / h_vals[:-1]
    xlog = np.log10(x)
    m1, b1 = linregress(xlog, np.log10(l1))[:2]
    m2, b2 = linregress(xlog, np.log10(l2))[:2]
    mi, bi = linregress(xlog, np.log10(linf))[:2]
    x_ = np.linspace(xlog.min(), xlog.max(), 25)
    x_10 = 10 ** x_
    lstr = "m = {:.02f}"
    plt.loglog(x, l1, "ks", label=r"$L_1$", basex=base, basey=base)
    plt.plot(x_10, 10 ** (m1 * x_ + b1), "k--", label=lstr.format(m1))
    plt.loglog(x, l2, "ro", label=r"$L_2$", basex=base, basey=base)
    plt.plot(x_10, 10 ** (m2 * x_ + b2), "r--", label=lstr.format(m2))
    plt.loglog(x, linf, "b<", label=r"$L_{\infty}$", basex=base, basey=base)
    plt.plot(x_10, 10 ** (mi * x_ + bi), "b--", label=lstr.format(mi))
    if xlims is not None:
        plt.xlim(xlims)
    plt.legend(numpoints=1)
    plt.xlabel("1/h")
    plt.ylabel("Error norm")
    return fig
Ejemplo n.º 4
0
def _plot_pres_wind(pressures, winds):
    fig = plt.figure()
    ax = plt.subplot(121)
    plt.plot(pressures[:, 0], pressures[:, 1], 'b+')
    rp = stats.linregress(pressures)
    label = 'grad.: {0:.2f}\nintercept: {1:.1f}\n r$^2$: {2:.2f}'.format(rp[0], rp[1], rp[2] ** 2)
    plt.plot((880, 1040), (880 * rp[0] + rp[1], 1040 * rp[0] + rp[1]), 'r--', label=label)
    plt.ylabel('derived track pressure (hPa)')
    plt.xlabel('best track\npressure (hPa)')
    plt.legend(bbox_to_anchor=(0.9, 1.23), numpoints=1, prop={'size': 10})
    ax.set_xticks((880, 920, 960, 1000, 1040))

    ax = plt.subplot(122)
    plt.plot(winds[:, 0], winds[:, 1], 'b+')
    rw = stats.linregress(winds)
    label = 'grad.: {0:.2f}\nintercept: {1:.1f}\n r$^2$: {2:.2f}'.format(rw[0], rw[1], rw[2] ** 2)
    plt.plot((0, 160), (0 * rw[0] + rw[1], 160 * rw[0] + rw[1]), 'r--', label=label)
    plt.ylabel('derived track max. wind speed (ms$^{-1}$)')
    plt.xlabel('best track\nmax. wind speed (ms$^{-1}$)')
    plt.legend(bbox_to_anchor=(0.9, 1.23), numpoints=1, prop={'size': 10})
    ax.set_xticks((0, 40, 80, 120, 160))
    ax.yaxis.tick_right()
    ax.yaxis.set_label_position("right")

    fig.set_size_inches(6.3, 3)
    _save_figure('press_max_ws_corr_2005.png')
Ejemplo n.º 5
0
def main():
    timestamps = []
    bottom_norms = []
    top_norms = []

    # expects norms.dat in same directory. Can be changed to be a command-line arg
    f = open('norms.dat', 'r')
    for line in f:
        words = line.split(' ')
        timestamps.append(words[0][4:20])
        bottom_norms.append(words[1])
        top_norms.append(words[2])

    slope, intercept, r_value, p_value, std_err = stats.linregress(np.asarray(timestamps, float), np.asarray(bottom_norms, float))

    bottom_result = {
        'slope': slope,
        'intercept': intercept,
        'start_time': timestamps[0]
    }

    slope, intercept, r_value, p_value, std_err = stats.linregress(np.asarray(timestamps, float), np.asarray(top_norms, float))

    top_result = {
        'slope': slope,
        'intercept': intercept,
        'start_time': timestamps[0]
    }

    result = {
        "bottom": bottom_result,
        "top": top_result
    }

    return result
Ejemplo n.º 6
0
def WCFitCoeff(wcut, opt):

    if np.sum(wcut) < 0.01:
        slope = 0
        intercept = 0
    else:
        water_break_index = np.argwhere(wcut > 0)[0, 0] # Get the first position where WOR is > 0
        new_wc, new_opt = np.array(wcut[water_break_index:]), np.array(opt[water_break_index:]) * 0.0062898
        slope, intercept, r_value, p_value, slope_std_error = stats.linregress(new_opt, new_wc)

        if r_value < 0.99:
            ratio = float(len(new_wc)) / float(len(wcut))

            if ratio > 0.1:

                if ratio/2 > 0.1:
                    ration2_index = water_break_index + int(len(new_wc)/2)
                    new_wc, new_opt = np.array(wcut[ration2_index:]), np.array(opt[ration2_index:]) * 0.0062898
                    slope, intercept, r_value, p_value, slope_std_error = stats.linregress(new_opt, new_wc)

                    if r_value < 0.99:
                        r90_index = int(0.9 * len(wcut))
                        new_wc, new_opt = np.array(wcut[r90_index:]), np.array(opt[r90_index:]) * 0.0062898
                        slope, intercept, r_value, p_value, slope_std_error = stats.linregress(new_opt, new_wc)

    return slope, intercept
Ejemplo n.º 7
0
    def model_mean_disp_by_lm(self,allgenedict):
        '''
        Modeling the mean and dispersion by linear regression
        '''
        list_k=[]
        list_dispersion=[]
        for (gid,gsk) in allgenedict.iteritems():
            nsg=len(gsk.nb_count[0])
            nsample=len(gsk.nb_count)
            if len(gsk.sgrna_kvalue)>0:
                if gsk.MAP_sgrna_dispersion_estimate!=None:
                    sg_k=[x[0] for x in gsk.sgrna_kvalue.tolist()]
                    sg_dispersion=gsk.MAP_sgrna_dispersion_estimate
                    if len(sg_k)>=nsg*nsample:
                        list_k+=sg_k[:(nsg*nsample)]
                        list_dispersion+=sg_dispersion[:(nsg*nsample)]


        k_log=np.log(list_k)
        dispersion_log=np.log(list_dispersion)
        # remove those with too low variance
        k_log2=np.array([k_log[i] for i in range(len(dispersion_log)) if dispersion_log[i]>(-1)])
        dispersion_log2=np.array([dispersion_log[i] for i in range(len(dispersion_log)) if dispersion_log[i]>(-1)])
        if len(k_log2)>20:
            (slope,intercept,r_value,p_value,std_err)=linregress(k_log2,dispersion_log2)
        else:
            (slope,intercept,r_value,p_value,std_err)=linregress(k_log,dispersion_log)
        self.lm_intercept=intercept
        self.lm_coeff=slope

        logging.info('Linear regression: y='+str(slope)+'x+'+str(intercept))
        if np.isnan(slope) or np.isnan(intercept):
            logging.error('Nan values for linear regression')
Ejemplo n.º 8
0
def _extrapolate(x, y):
    """This is a very simple extrapolation. 
    Takes: two series of the same pandas DataTable. x is most likely the index of the DataTable
    assumption: 
        - x is are the sampling points while y is the dataset which is incomplete and needs to be extrapolated
        - the relation ship is very close to linear
    proceedure:
        - takes the fist two points of y and performs a linear fit. This fit is then used to calculate y at the very first
          x value
        - similar at the end just with the last two points.
    returns: nothing. everthing happens inplace
    """
    
    xAtYnotNan = x.values[~np.isnan(y.values)][:2]
    YnotNan = y.values[~np.isnan(y.values)][:2]
    slope, intercept, r_value, p_value, slope_std_error = stats.linregress(xAtYnotNan,YnotNan)

    fkt = lambda x: intercept + (slope * x)
    y.values[0] = fkt(x.values[0])

    xAtYnotNan = x.values[~np.isnan(y.values)][-2:]
    YnotNan = y.values[~np.isnan(y.values)][-2:]
    slope, intercept, r_value, p_value, slope_std_error = stats.linregress(xAtYnotNan,YnotNan)

    fkt = lambda x: intercept + (slope * x)
    y.values[-1] = fkt(x.values[-1])
    
    return
Ejemplo n.º 9
0
def angleDetection(cx,cy,lx,ly,ux,uy,overallminx,overallmaxx,DEBUG=False):
	lineAngleList = []
	lineInterceptList = []
	lineSlopeList = []
	lineUpperInterceptList = []
	lineUpperSlopeList = []
	lineLowerInterceptList = []
	lineLowerSlopeList = []	
	# Fit line on the last line if enough point is available
	for it in range(len(cx)):
		if len(cx[it])>0:
			lineslope, intercept, r_value, p_value, std_err = stats.linregress(cx[it],cy[it])
			plotLinearRegression(plt,overallminx,overallmaxx,lineslope, intercept,"0.5")
			lineAngle = math.atan(lineslope)
			lineAngleList.append(lineAngle)
			lineSlopeList.append(lineslope)						
			lineInterceptList.append(intercept)
			if DEBUG:
				print "Slope: %.2f, Intercept: %.2f, Angle: %.2f" % (lineslope,intercept,lineAngle)
				print "Centerx, centery: ", cx, cy
			lineslope, intercept, r_value, p_value, std_err = stats.linregress(lx[it],ly[it])
			plotLinearRegression(plt,overallminx,overallmaxx,lineslope, intercept,"0.3")					
			lineLowerSlopeList.append(lineslope)						
			lineLowerInterceptList.append(intercept)

			lineslope, intercept, r_value, p_value, std_err  = stats.linregress(ux[it],uy[it])
			plotLinearRegression(plt,overallminx,overallmaxx,lineslope, intercept,"0.3")
			lineUpperSlopeList.append(lineslope)						
			lineUpperInterceptList.append(intercept)
	
	return (lineAngleList,lineSlopeList,lineInterceptList,lineLowerSlopeList,lineLowerInterceptList,lineUpperSlopeList,lineUpperInterceptList)
Ejemplo n.º 10
0
def movementType(mom, length, verbose):
    '''
    Here we compute the movement type as indicated in this publication: Sbalzarini 2005a
    If the correlation coefficient for the coefficient diffusion regression is low, we put 0 instead.
    If one of the regressions leading to the movement type has a correlation coefficient that is under 
    0.7, then we add 1 to indicateur. It's because we are interested to know which trajectories have
    this issue, and if it's only for one regression or for all.
    
    Returns: diffusion coefficient, movement type and adequateness of diffusion
    '''
    
    indicateur = 0
    x=np.log(range(1, int(length/3)+1));y=np.log(mom)
    gamma=np.zeros(shape=(len(moments),))
    for nu in moments:
        r=linregress(x, y[nu-1])
        if verbose>5:
            print "correlation coefficient", r[2]
            print 'p-value', r[3]
        gamma[nu-1]=r[0]
        if nu==2:
            if r[2]>=0.70:
                D=np.exp(r[1])/(4)
            else:
                D=0
            corr = r[2]

        if r[2]<0.70:
            indicateur += 1
    r2=linregress(moments, gamma)

    if verbose>5:
        print "slope ", r2[0], "diffusion coefficient", D 
    return indicateur, r2[0], D, corr
Ejemplo n.º 11
0
def pinkNoiseCharacterize(pspectrum,normalize=True,plot=False):
    '''Compute main power spectrum characteristics'''
    if normalize:
        pspectrum = pspectrum/np.sum(pspectrum)
    
    S = entropy(pspectrum,1)
    
    x = np.arange(1,len(pspectrum)+1)
    lx = np.log10(x)
    ly = np.log10(pspectrum)
    
    c1 = (x > 0)*(x < 80)
    c2 = x >= 80
    
    fit1 = stats.linregress(lx[c1],ly[c1])
    fit2 = stats.linregress(lx[c2],ly[c2])
    
    #print fit1
    #print fit2
    
    if plot:
        plot(lx,ly)
        plot(lx[c1],lx[c1]*fit1[0]+fit1[1],'r-')
        plot(lx[c2],lx[c2]*fit2[0]+fit2[1],'r-')
        
    return {'S':S,'slope1':fit1[0],'slope2':fit2[0]}
Ejemplo n.º 12
0
def plot(filename):
    data = seq.open(filename).map(parse_line)
    bfs = data.filter(_.algorithm == 'bfs')
    dfs = data.filter(_.algorithm == 'dfs')
    x = np.array(bfs.map(lambda x: x.vertexes * x.edges * x.edges).list())
    y = np.array(bfs.map(_.runtime).list())
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    print(slope, intercept, r_value, p_value, std_err)
    plt.title('Numerical Performance of Edmonds-Karp')
    plt.xlabel('Input Size in VE^2')
    plt.ylabel('Running Time in Seconds')
    plt.scatter(x, y)
    plt.show()
    plt.clf()
    ff_data = dfs.map(lambda x: (x.flow, x.flow * x.edges, x.runtime)).group_by(_[0]).cache()
    plt.title('Numerical Performance of Ford-Fulkerson')
    plt.xlabel('Input Size in Ef')
    plt.ylabel('Running Time in Seconds')
    max_flow = ff_data.max_by(lambda kv: kv[0])[0]
    all_x = list()
    all_y = list()
    for k, v in ff_data:
        x = list(map(_[1], v))
        all_x.extend(x)
        y = list(map(_[2], v))
        all_y.extend(y)
        ratio = 1 - k / max_flow
        if ratio > .8:
            ratio = .8
        plt.scatter(x, y, color=str(ratio))
    x = np.array(all_x)
    y = np.array(all_y)
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    print(slope, intercept, r_value, p_value, std_err)
    plt.show()
def do_pca_analysis(profiles, lens, name='', plot=False):
	L = np.array(0.446*(lens-np.mean(lens)), dtype='float64')
	pr = []
	for i,p in enumerate(profiles):
		mask = np.isnan(p)
		p[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), p[~mask])
		av, va = moving_average(np.log(p+0.001), 46, 100)
		pr.append(av)
	y = np.array(pr)
	pca = PCA(n_components=2)
	pca.fit(y)
	print pca.explained_variance_ratio_
	yp = pca.transform(y)
	m,b,r,p,_ = stats.linregress(L, yp[:,0])
	p1 = [p]
	r1 = [r]
	for _ in xrange(300):
		sample = np.random.choice(L.shape[0], L.shape[0], replace=True)
		m,b,r,p,_ = stats.linregress(L[~sample], yp[~sample,0])
		p1.append(p)
		r1.append(r)
	m,b,r,p,_ = stats.linregress(L, yp[:,1])
	p2 = [p]
	r2 = [r]
	for _ in xrange(300):
		sample = np.random.choice(L.shape[0], L.shape[0], replace=True)
		m,b,r,p,_ = stats.linregress(L[~sample], yp[~sample,1])
		p2.append(p)
		r2.append(r)
	if plot:
		plot_pca(y, pca, yp, L, name)
	return r1, p1, r2, p2, L.shape[0], name, np.std(L)
Ejemplo n.º 14
0
def doAnalysis(canvas):
    xi = arange(0,26)
    A = array([ xi, ones(26)])
    # linearly generated sequence
    y = canvas.data.times
    slope, intercept, r_value, p_value, std_err = stats.linregress(xi,y)
    return stats.linregress(xi,y)
Ejemplo n.º 15
0
def calculate_breaking_points(quant_list):
    MAX_ERROR = 5
    RANGE = 5
    CENTER = 50
    BRAKE_POINTS = dict()
    # right to left window
    for x_iter in range(100, CENTER, -1):
        x_proj = range(x_iter - RANGE, x_iter)
        # pylab.plot(x_proj, quant[x-RANGE:x], 'k',alpha=0.5)
        y_subset = quant_list[x_iter - RANGE:x_iter]
        slope, intercept, r_value, p_value, std_err = stats.linregress(x_proj, y_subset) 
        g, l = calculate_error(slope, intercept, x_proj, y_subset)
        # print x-RANGE,x, slope, intercept, y[0], f(x, slope, intercept), l,r
        if l > MAX_ERROR:
            BRAKE_POINTS[x_iter - RANGE / 2] = {"error":l, "slope":slope, "offset":intercept}
    # left to right window
    for x_iter in range(0, CENTER, 1):
        x_proj = range(x_iter, x_iter + RANGE)
        # pylab.plot(x_proj, quant_list[x_iter:x_iter + RANGE], 'k', alpha=0.3)
        y_subset = quant_list[x_iter:x_iter + RANGE]
        slope, intercept, r_value, p_value, std_err = stats.linregress(x_proj, y_subset) 
        g, l = calculate_error(slope, intercept, x_proj, y_subset)
        # print x,x+RANGE, slope, intercept, y[0], f(x, slope, intercept), l,r
        if l > MAX_ERROR:
            if ((x_iter - RANGE + x_iter) / 2) not in BRAKE_POINTS.keys():
                BRAKE_POINTS[x_iter + (RANGE / 2)] = {"error":l, "slope":slope, "offset":intercept}
        # pylab.plot([x_iter, x_iter + RANGE, ], [ f(x_iter, slope, intercept), f(x_iter + RANGE, slope, intercept)], "b", alpha=0.3)
    return BRAKE_POINTS
Ejemplo n.º 16
0
    def do_stats(df):
        """do linregress and add to df"""
        try: 
            from scipy.stats import linregress
        except ImportError:
            
            thetime = strftime("%H:%M:%S", localtime())
            print('%s: sort type not available in this verion of corpkit.' % thetime)
            return False

        indices = list(df.index)
        first_year = list(df.index)[0]
        try:
            x = [int(y) - int(first_year) for y in indices]
        except ValueError:
            x = list(range(len(indices)))
        
        statfields = ['slope', 'intercept', 'r', 'p', 'stderr']

        stats = []
        if isinstance(df, Series):
            y = list(df.values)
            sl = Series(list(linregress(x, y)), index=statfields)

        else:    
            for entry in list(df.columns):
                y = list(df[entry])
                stats.append(list(linregress(x, y)))
            sl = DataFrame(zip(*stats), index=statfields, columns=list(df.columns))
        df = df.append(sl)
        
        # drop infinites and nans
        df = df.replace([np.inf, -np.inf], np.nan)
        df = df.fillna(0.0)
        return df
Ejemplo n.º 17
0
def analyse(sample_raw_data, analysis, id_list):
    """This function use all seven points.
    """
    x = [0, 60, 120, 180, 240, 300, 360]
    for name, data in sample_raw_data.items():
        item = [id_list[name],
                name, 
                0, 0, 0,
                0, 0, 0, 0,
                0, 0, 0, 0,
                0, 0, 0, 0
                ]
        fifty = data['50uM']
        twenty = data['20uM']
        ten = data['10uM']
        if 'OVRFLW' in fifty:
            continue
        ref1 = data['ref1']
        ref2 = data['ref2']
        ref3 = data['ref3']
        ref4 = data['ref4']
        ref5 = data['ref5']
        ref6 = data['ref6']
        ref = list()
        for i in range(7):
            to_mean = [ref1[i], ref2[i], ref3[i], ref4[i], ref5[i], ref6[i]]
            ref.append(sum(to_mean)/6)

        slope, intercept, r_value, _, _ = linregress(x, fifty)
        item[5] = slope
        item[9] = intercept
        item[13] = r_value ** 2
        slope, intercept, r_value, _, _ = linregress(x, twenty)
        item[6] = slope
        item[10] = intercept
        item[14] = r_value ** 2
        slope, intercept, r_value, _, _ = linregress(x, ten)
        item[7] = slope
        item[11] = intercept
        item[15] = r_value ** 2
        slope, intercept, r_value, _, _ = linregress(x, ref)
        item[8] = slope
        item[12] = intercept
        item[16] = r_value ** 2

        item[2] = item[5] / item[8]
        item[3] = item[6] / item[8]
        item[4] = item[7] / item[8]

        item.extend(fifty)
        item.extend(twenty)
        item.extend(ten)
        item.extend(ref)
        item.extend(ref1)
        item.extend(ref2)
        item.extend(ref3)
        item.extend(ref4)
        item.extend(ref5)
        item.extend(ref6)
        analysis.append(item)
def resid_plotter(cax, x, y, DL, logFlag, figtitle, figname, subf):
    print "...plotting..."
    #    if logFlag:
    #        x = np.log10(x)
    #        y = np.log10(y)
    xlabel = "Modeled Hg"
    ylabel = "Observed Hg"

    minx = np.min(x)
    miny = np.min(y)
    maxx = np.max(x)
    maxy = np.max(y)

    # override to specify axis limits
    minx = 0.001
    miny = minx
    maxx = 10.0
    maxy = maxx

    # limits
    minxy = np.min([minx, miny])
    maxxy = np.max([maxx, maxy])

    DLinds = np.nonzero(DL == 1)
    Detectinds = np.nonzero(DL == 0)
    plt.hold(True)
    mksize = 4.5
    # plot one to one line
    plt.plot([minxy, maxxy], [minxy, maxxy], "black")

    # plot up the detects solid
    plotx = x[Detectinds]
    ploty = y[Detectinds]
    plt.plot(plotx, ploty, "bo", markerfacecolor="blue", markersize=mksize, markeredgecolor="black")
    # plot up the nondetects white
    plotx = x[DLinds]
    ploty = y[DLinds]
    plt.plot(plotx, ploty, "bo", markerfacecolor="white", markersize=mksize, markeredgecolor="black")
    plt.xlim([minxy, maxxy])
    plt.ylim([minxy, maxxy])

    if logFlag:
        plt.yscale("log")
        plt.xscale("log")

    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    ticknames = ["0.001", "0.01", "0.1", "1.0", "10.0"]
    plt.setp(ax1, xticklabels=ticknames)
    plt.setp(ax1, yticklabels=ticknames)

    plt.text(0.0015, 5, "{0}".format(figtitle))
    plt.savefig("{0}/{1}.pdf".format(subf, figname))
    plt.close("all")
    if logFlag:
        slope, intercept, r_value, p_value, std_err = stats.linregress(np.log10(plotx), np.log10(ploty))
    else:
        slope, intercept, r_value, p_value, std_err = stats.linregress(plotx, ploty)
    return r_value ** 2
Ejemplo n.º 19
0
def tagstrendToTaxoLineChart(dataframe, title, dates, split, colourDict, taxonomies, emptyOther):
    style = createTagsPlotStyle(dataframe, colourDict)
    line_chart = pygal.Line(x_label_rotation=20, style=style)
    line_chart.title = title
    line_chart.x_labels = dates
    xi = numpy.arange(split)
    for taxonomy in taxonomies:
        taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy)
        taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
        taxo_line_chart.title = title + ': ' + taxonomy
        taxo_line_chart.x_labels = dates
        for it in dataframe.iterrows():
            if it[0].startswith(taxonomy):
                slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])
                line = slope * xi + intercept
                taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), line, show_dots=False)
                dataframe = dataframe.drop([it[0]])
        taxo_line_chart.render_to_file('plot/' + taxonomy + '_trend.svg')

    if not emptyOther:
        taxoStyle = createTagsPlotStyle(dataframe, colourDict)
        taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
        taxo_line_chart.title = title + ': other'
        taxo_line_chart.x_labels = dates
        for it in dataframe.iterrows():
            slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])
            line = slope * xi + intercept
            taxo_line_chart.add(it[0], line, show_dots=False)
        taxo_line_chart.render_to_file('plot/other_trend.svg')
Ejemplo n.º 20
0
def calc_dp():
    folders = [fname for fname in os.walk('.').next()[1] if fname[0] == 'd']
    H**O = []
    LUMO = []
    ab = []
    d = [float(f[1:]) for f in folders]
    home = os.getcwd()
    print home
    print "------------------------DFT Results------------------------"
    for folder in folders:
        os.chdir(home+'/'+folder)
        this_homo, this_lumo, this_ab = getHLA()
        H**O.append(this_homo)
        LUMO.append(this_lumo)
        ab.append(this_ab)
    HOMO_p = (np.array(H**O)+np.array(ab)).tolist()
    LUMO_p = (np.array(LUMO)+np.array(ab)).tolist()
    print "Deform\tHOMO\tLUMO\tab\tHOMO+ab\tLUMO+ab"
    for row in zip(d, H**O, LUMO, ab, HOMO_p, LUMO_p):
        print "%7.4f\t%7.4f\t%7.4f\t%7.4f\t%7.4f\t%7.4f" % row
    print "--------------------------Fitting--------------------------"
    Ev, intercept, r_value, p_value, std_err_v = linregress(d, HOMO_p)
    Ec, intercept, r_value, p_value, std_err_c = linregress(d, LUMO_p)

    print "Ev = %5.3f (error = %5.3f)" % (Ev, std_err_v)
    print "Ec = %5.3f (error = %5.3f)" % (Ec, std_err_c)
    print "--------------------------End------------------------------\n"
    os.chdir(home)
    str_name = home.split(os.sep)[-1]
    struct_name = home.split(os.sep)[-2]

    return struct_name, str_name, Ev, Ec, std_err_v, std_err_c
def forecast_from_trend_line(xs, yrs, forecast_yrs, forecast_periods, trend_function):
    """
    Forecast data by using the specified trend function. Trend functions are the same functions offered in Excel
    for adding trend lines to a plot.
    """
    if trend_function == 1: # Linear trend (y = ax + B)
        slope, intercept, _, _, _ = stats.linregress(yrs, xs)
        y = slope * forecast_yrs + intercept
    elif trend_function == 2: # 2nd degree Polynomial trend (p(x) = p[0] * x**2 + p[2])
        z = np.polyfit(yrs, xs, 2)
        y = np.polyval(z, forecast_yrs)
    elif trend_function == 3: # 3rd degree Polynomial trend (p(x) = p[0] * x**3 + x**2 + p[3])
        z = np.polyfit(yrs, xs, 3)
        y = np.polyval(z, forecast_yrs)
    elif trend_function == 4: # Logarithmic trend (y = A + B log x)
        slope, intercept, _, _, _ = stats.linregress(np.log(yrs), xs)
        y = intercept + slope * np.log(forecast_yrs)
    elif trend_function == 5: # Exponential trend (y = Ae^(Bx))
        slope, intercept, _, _, _ = stats.linregress(yrs, np.log(xs))
        y = np.exp(intercept) * np.exp(slope * forecast_yrs)
    elif trend_function == 6: # Power function trend (y = Ax^B)
        slope, intercept, _, _, _ = stats.linregress(np.log(yrs), np.log(xs))
        y = np.exp(intercept) * np.power(forecast_yrs, slope)
    elif trend_function == 7: # Exponential smoothing with a dampened trend
        xs_fit_opt = exp_smooth.calc_variable_arrays(.98, xs, forecast_periods)
        y = exp_smooth.exp_smooth_forecast(xs_fit_opt, True)[-forecast_periods:]
    else: # Consumption forecasting with elasticity and income
        y = 8

    # Mask any negative, zero, infinity, or n/a values before returning
    y = np.ma.masked_less_equal(y, 0)
    y = np.ma.fix_invalid(y)
    return y
Ejemplo n.º 22
0
def test_linear_regression_full():
    # Requires scipy, which isn't always available.
    # Not good for coverage but scipy is hard to install correctly on travis-ci
    try:
        from scipy.stats import linregress

        timesteps = numpy.arange(0, 100, dtype=numpy.uint8)
        k = numpy.random.rand(100)
        b = numpy.random.rand(k.shape[0])
        # shape is (timesteps.shape, k.shape)
        values = numpy.outer(timesteps, k) + b + numpy.random.normal(
            size=(timesteps.shape[0], k.shape[0]), scale=0.1)
        values = values.reshape(
            (100, 10, 10))  # divide the second dimension into two parts

        slopes, intercepts, r2vals, pvals = linear_regression(timesteps, values,
                                                              full=True)

        s, i, r, p = linregress(timesteps, values[:, 0, 0])[:4]
        assert numpy.allclose((s, i, r ** 2, p),
                              (slopes[0, 0], intercepts[0, 0], r2vals[0, 0],
                               pvals[0, 0]))

        s, i, r, p = linregress(timesteps, values[:, 2, 0])[:4]
        assert numpy.allclose((s, i, r ** 2, p),
                              (slopes[2, 0], intercepts[2, 0], r2vals[2, 0],
                               pvals[2, 0]))

    except ImportError:
        print('WARNING: scipy not available for testing')
Ejemplo n.º 23
0
def OffsetPlot():
    import pylab as P
    import scipy.stats as S
    
    offsp = S.spearmanr(data[:,dict['medianOffset']], telfocusCorrected)
    offreg = S.linregress(data[:,dict['medianOffset']], telfocusCorrected)
    offreg2 = S.linregress(data[:,dict['medianOffset']], telfocusOld)
    min = -50.
    max = 50.
    
    print '\nOffset Spearman rank-order:', offsp
    print 'Offset fit:', offreg
    print 'and For unCorrected data:', offreg2
    
    P.plot(data[:,dict['medianOffset']], telfocusCorrected, 'bo', label = 'Data')
    P.plot([min,max], [min*offreg[0] + offreg[1], max*offreg[0] + offreg[1]], 
           'r-', label ='Linear Fit (Corrected)', lw = 2.0)
    P.plot([min,max], [min*offreg2[0] + offreg2[1], max*offreg2[0] + offreg2[1]], 
           'g--', label ='Linear Fit (UnCorrected)', lw = 1.5)
    P.axhline(medianNew, color ='b')
    P.xlim(min, max)
    P.xlabel('Median Offset (telescope units)')
    P.ylabel('Temperature Corrected Telescope Focus + Median Offset')
    P.legend(shadow=True)
    P.savefig('offsetCorrelation.png')
    P.close()
Ejemplo n.º 24
0
    def findscparam(self):
        if  not self.setparam:
            return
        if self.ivdata[:, 0][0]>self.ivdata[:, 0][1]:
            volt = np.flipud(self.ivdata[:, 0])
            curr = np.flipud(self.ivdata[:, 1])
        else:
            volt = self.ivdata[:, 0]
            curr = self.ivdata[:, 1]
#        finding last data position before zero crossing
        zero_crossing=np.where(np.diff(np.sign(curr)))[0][0]
#        creating function for data interpolation
        data_interpld = interpolate.interp1d(volt, curr,  kind='cubic')
#        approximate Voc value by linear interpolation
        slope = (curr[zero_crossing +1] - curr[zero_crossing])/(volt[zero_crossing + 1]-volt[zero_crossing])
        intercept = curr[zero_crossing] - slope*volt[zero_crossing]
#        slope,  intercept,  r_value,  p_value,  std_err = stats.linregress(volt[zero_crossing:zero_crossing+1],  curr[zero_crossing:zero_crossing+1])
        voc = - intercept/slope
        isc = data_interpld(0)
#        finding max power point
        voltnew = np.arange(0, volt[zero_crossing+1],  0.001)
        maxscpower = max(np.abs(np.multiply(voltnew,  data_interpld(voltnew))))
        maxscpower_voltposition = np.argmax(np.abs(np.multiply(voltnew, data_interpld(voltnew))))
        fillfactor = np.abs(maxscpower/(voc*isc))
        effic = maxscpower*1000/(self.sampleparameters[2]*self.sampleparameters[1])
#        finding r_s and r_shunt graphically --- approximate method
        rsh_slope,  intercept,  r_value,  p_value,  std_err = stats.linregress(voltnew[0:int(maxscpower_voltposition*0.8)], data_interpld(voltnew[0:int(maxscpower_voltposition*0.8)]))
        rshunt = np.abs(1/rsh_slope)
        rs_slope,  intercept,  r_value,  p_value,  std_err = stats.linregress(voltnew[-50:-1], data_interpld(voltnew[-50:-1]))
        rseries = np.abs(1/rs_slope)
        return [isc,  voc,  fillfactor,  maxscpower,  effic, rshunt,  rseries]
Ejemplo n.º 25
0
def measure_okr(h, t, falls, minPPoints=30, minSPoints=3, minP=0.1, figNum=None):
    pursuitVel = []
    saccadeVel = []
    i = 1
    if figNum != None:
        figure(figNum)
    while i < len(falls):
        pStart = falls[i-1]['start']+falls[i-1]['length']+2
        pEnd = falls[i]['start']
        sStart = falls[i]['start']
        sEnd = falls[i]['start']+falls[i]['length']
        if pEnd - pStart < minPPoints or sEnd - sStart < minSPoints:
            i += 1
            continue
        pr = linregress(t[pStart:pEnd],h[pStart:pEnd])
        sr = linregress(t[sStart:sEnd],h[sStart:sEnd])
        if (pr[3] <= minP and sr[3] <= minP):
            saccadeVel += [sr[0],]
            pursuitVel += [pr[0],]
            if figNum != None:
                ts = array([t[sStart],t[sEnd]])
                ys = ts * sr[0] + sr[1]
                plot(ts,ys,c='g',linewidth=2)
                ts = array([t[pStart],t[pEnd]])
                ys = ts * pr[0] + pr[1]
                plot(ts,ys,c='r',linewidth=2)
        i += 1
    if figNum != None:
        plot(t,h,c='k')
    return pursuitVel, saccadeVel
Ejemplo n.º 26
0
def do_pca_analysis(profiles, lens, name=""):
    L = np.array(0.446 * (lens - np.mean(lens)), dtype="float64")
    profiles_smooth_l = []
    for i, p in enumerate(profiles):
        mask = np.isnan(p)
        p[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), p[~mask])
        average, va = scalingBicoidFinalReally.moving_average(np.log(p + 0.001), 46, 100)
        profiles_smooth_l.append(average)
    profiles_a = np.array(profiles_smooth_l)
    pca = PCA(n_components=2)
    pca.fit(profiles_a)
    print pca.explained_variance_ratio_
    profiles_transformed_a = pca.transform(profiles_a)
    m, b, r, p, _ = stats.linregress(L, profiles_transformed_a[:, 0])
    p1 = [p]
    r1 = [r]
    for _ in xrange(1000):
        sample = np.random.choice(L.shape[0], L.shape[0], replace=True)
        m, b, r, p, _ = stats.linregress(L[~sample], profiles_transformed_a[~sample, 0])
        p1.append(p)
        r1.append(r)
    m, b, r, p, _ = stats.linregress(L, profiles_transformed_a[:, 1])
    p2 = [p]
    r2 = [r]
    for _ in xrange(1000):
        sample = np.random.choice(L.shape[0], L.shape[0], replace=True)
        m, b, r, p, _ = stats.linregress(L[~sample], profiles_transformed_a[~sample, 1])
        p2.append(p)
        r2.append(r)
    plot_pca(profiles_a, pca, profiles_transformed_a, L, name)
    more_stats_d = {"norm_sigma_l": np.std(lens) / np.mean(lens)}
    return pca, (r1, p1, r2, p2, L.shape[0], name, np.std(L), more_stats_d)
Ejemplo n.º 27
0
        def get_Slope(start_at,finish_at,sample,aflow):
            length = len(aflow[start_at:finish_at])
            length20 = int(length*0.2)+start_at
            length80 = int(length*0.8)+start_at

            x2 = sample[start_at:length20]
            y2 = aflow[start_at:length20]
            # call liner regression for that data set
            slope, intercept, r_value, p_value, std_err = stats.linregress(x2,y2)
            angle_1 = math.degrees(math.atan(slope))
            self.ax1.plot(x2,y2,'.' '-r')
     
            
            x2 = sample[length20:length80]
            y2 = aflow[length20:length80]
            self.ax1.plot(x2,y2,'.' '-g')
            # call liner regression for that data set
            slope, intercept, r_value, p_value, std_err = stats.linregress(x2,y2)
            angle_2 = math.degrees(math.atan(slope))

            x2 = sample[length80:finish_at]
            y2 = aflow[length80:finish_at]
            self.ax1.plot(x2,y2,'.' '-b')
            # call liner regression for that data set
            slope, intercept, r_value, p_value, std_err = stats.linregress(x2,y2)
            angle_3 = math.degrees(math.atan(slope))
            return angle_1, angle_2, angle_3
Ejemplo n.º 28
0
def _fit_align(xcoefs, ycoefs, misll, fitll, relx, rely):
    """fitting stage of procedure align_correlate
    """

    acoefx, bcoefx = None, None
    acoefy, bcoefy = None, None
    xfit, yfit = None, None

    # inter-/extra-polation
    if misll:
        # find linear fit
        from scipy.stats import linregress
            
        if xcoefs is None:
            acoefx, bcoefx = linregress(fitll, relx[fitll])[:2]      
        else:
            acoefx, bcoefx = xcoefs

        if ycoefs is None:
            acoefy, bcoefy = linregress(fitll, rely[fitll])[:2]                
        else:
            acoefy, bcoefy = ycoefs
   
        # calculate offsets for layers in misll
        for i in misll:
            relx[i] = acoefx*i + bcoefx
            rely[i] = acoefy*i + bcoefy

    return relx, rely, acoefx, bcoefx, acoefy, bcoefy
Ejemplo n.º 29
0
def linear_regress(data, log=True, clip=None, r2=0.8, **kwargs):
    """Fit a 1st order polynomial by doing first order polynomial fit."""
    ys = pd.DataFrame(data)
    values = pd.DataFrame(index=['slope', 'intercept', 'good'])
    good = False
    fits = {}
    for col in ys:
        if clip:
            y = ys[col].dropna()
            limit = np.arange(1,np.min(((1+clip),len(y.index))))
            y = ys.loc[limit,[col]][col]
            x = pd.Series(y.index.values, index=y.index, dtype=np.float64)
        else:
            y = ys[col].dropna()
            x = pd.Series(y.index.values, index=y.index, dtype=np.float64)
        if log:
            slope, intercept, r, p, stderr = \
                    stats.linregress(np.log(x), np.log(y))
            if r**2 > r2:
                good = True
            values[col] = [slope, np.exp(intercept), good]
            fits[col] = x.apply(lambda x: np.exp(intercept)*x**slope)
        else:
            slope, intercept, r, p, stderr = \
                    stats.linregress(x, y)
            if r**2 > r2:
                good = True
            values[col] = [slope, intercept, good]
            fits[col] = x.apply(lambda x: intercept*x**slope)
    values = values.T
    fits = pd.concat(fits, axis=1)
    return (values,fits)
def plot_planar_pos_error_sensitivity():
    error, avg, max = load_error_data("errorFinal.dat")
    error_frac = float(1)/error

    fig = plt.figure()
    fig.set_size_inches(fig_size, (float(6)/8)*fig_size)
    plt.grid(True)
    #plt.title("Farfield error due to planar uncertainty")
    plt.xlabel("Planar position error [$\lambda$]")
    plt.ylabel("Farfield error")
    plt.xlim(np.min(error_frac), np.max(error_frac))
    plt.plot(error_frac, avg)
    plt.plot(error_frac, max)
    plt.legend(["avg", "max"], loc='upper left')

    # Calculate error sensitivity equations
    slope, intercept, r_value, p_value, std_error = stats.linregress(error_frac, avg)
    print("Avg")
    print(slope)
    print(intercept)
    slope, intercept, r_value, p_value, std_error = stats.linregress(error_frac, max)
    print("Max")
    print(slope)
    print(intercept)
    x = np.linspace(0, 0.25, 50)
Ejemplo n.º 31
0
def derive_EBM_II(T, N, xCO2):
    
    n_years = T.size
    
    #--------------------------------------------------------------------------------------------------
    # 0. set param to the EBM-1 values
    #--------------------------------------------------------------------------------------------------
    
    EBM_0 = derive_EBM_I(T, N, xCO2)
    
    forcage = FORCING(typ='abrupt', xCO2_infty=xCO2)

    datas_EBM = analytical_EBM(EBM_0, forcage, n_years)
    T0_EBM = datas_EBM['T0']
    T_EBM = datas_EBM['T']
    H_EBM = datas_EBM['H']
    
    n_iters = 10
    for iter in range(n_iters):
        
        X = np.c_[T, H_EBM[1:]]
        regr = linear_model.LinearRegression()
        regr.fit(X, N)
        forc = regr.intercept_
        lbda = - regr.coef_[0]
        epsi = 1 - regr.coef_[1]
        
        # print('=====>')       
        # print(forc)
        # print(lbda)
        # print(epsi)
        # print('=====>')       

        T_eq = forc / lbda
        
        t_i   = 80
        x_    = np.arange(t_i, n_years)
        y_    = np.log(1 - T[t_i-1:n_years-1] / T_eq)
        slope, intercept, r_value, p_value, std_err = stats.linregress(x_, y_)    
        tau_s = -1.0 / slope
        a_s   = np.exp(intercept)
        
        a_f = 1 - a_s
        t_i = 6
        t_  = np.arange(1, t_i)
        tau = t_ / (np.log(a_f) - np.log(1 - T[0:t_i-1]/T_eq - a_s*np.exp(-t_/tau_s)))
        tau_f = np.mean(tau)
        
        c   = lbda / (a_f / tau_f + a_s / tau_s)
        c_0 = lbda*(a_f*tau_f + a_s*tau_s) - c
        gam = c_0 / (a_s*tau_f + a_f*tau_s)

        # print(c)
        # print(c_0)
        
        c_0 = c_0 / epsi
        gam = gam / epsi
        
        myEBM = EBM(F=forc, lbda=lbda, c=c, c_0=c_0, gam=gam, epsi=epsi, xCO2=xCO2)
        
        datas_EBM = analytical_EBM(myEBM, forcage, n_years)
        T0_EBM = datas_EBM['T0']
        T_EBM = datas_EBM['T']
        H_EBM = datas_EBM['H']

    output = EBM(F=forc, lbda=lbda, c=c, c_0=c_0, gam=gam, epsi=epsi, xCO2=xCO2)

    return output
Ejemplo n.º 32
0
   <조건4> 회귀모델 세부 결과 확인  : summary()함수 이용 
'''

from scipy import stats
import pandas as pd
import statsmodels.formula.api as sm
import matplotlib.pyplot as plt
from pylab import plot, legend, show
score_iq  = pd.read_csv("C:/ITWILL/4_Python-II/data/score_iq.csv")
score_iq.info()

# 1
y = score_iq.score
x = score_iq.academy
# 2
model = stats.linregress(x,y)
model
''' LinregressResult(slope=4.847829398324446  <기울기>, 
                     intercept=68.23926884996192  <절편>, 
                     rvalue=0.8962646792534938  <설명력>, 
                     pvalue=4.036716755167992e-54  <pvalue>, 
                     stderr=0.1971936807753301  <표준오차>)      '''
y_pred = x * model.slope + model.intercept
# 3
plt.plot(x, y, 'bo', label='x,y scatter')
plt.plot(x, y_pred, 'r.-', label='y pred')
legend(loc='best')
plt.show()


# 1
X = X[np.logical_not(np.isnan(y)).ravel(), :]
DX = DX[np.logical_not(np.isnan(y))]
y = y[np.logical_not(np.isnan(y))]
assert X.shape == (80, 261212)

X = X[DX != 2]
y = y[DX != 2]
DX = DX[DX != 2]
assert X.shape == (62, 261212)

lr = linear_model.Ridge(alpha=0.5)

# cross_val_predict returns an array of the same size as `y` where each entry
# is a prediction obtained by cross validation:
pred = sklearn.cross_validation.cross_val_predict(lr, X, y, cv=n_folds)
slope, intercept, r_value, p_value, std_err = stats.linregress(y, pred)

plt.plot(y, pred, 'o', label='original data')
plt.plot(y, intercept + slope * y, 'r', label='fitted line')
plt.xlabel("MAASC score")
plt.ylabel("Predicted score using MRI-based features")
plt.legend()
plt.show()

plt.figure()
plt.grid()
plt.title("R2 = %.02f and p = %.01e" % (r_value, p_value), fontsize=12)
plt.plot(y[DX == 1], pred[DX == 1], 'o', label="ASD")
plt.plot(y[DX == 3], pred[DX == 3], 'o', label="SCZ")
plt.plot(y, intercept + slope * y, 'r', color="black")
plt.xlabel("MAASC score")
Ejemplo n.º 34
0
Archivo: p13.py Proyecto: tutuhuang/iem
def plotter(fdict):
    """ Go """
    import matplotlib
    matplotlib.use('agg')
    import matplotlib.pyplot as plt
    pgconn = get_dbconn('coop')
    cursor = pgconn.cursor(cursor_factory=psycopg2.extras.DictCursor)
    ctx = get_autoplot_context(fdict, get_description())
    which = ctx['which']
    station = ctx['station']
    network = "%sCLIMATE" % (station[:2], )
    nt = NetworkTable(network)

    table = "alldata_%s" % (station[:2], )

    cursor.execute(
        """
    select year, extract(doy from day) as d from
        (select day, year, rank() OVER (PARTITION by year ORDER by avg DESC)
        from
            (select day, year, avg((high+low)/2.) OVER
            (ORDER by day ASC rows 91 preceding) from """ + table + """
            where station = %s and day > '1893-01-01') as foo)
            as foo2 where rank = 1
            ORDER by year ASC
    """, (station, ))
    years = []
    maxsday = []
    today = datetime.date.today()
    delta = 0 if which == 'end_summer' else 91
    for row in cursor:
        if row['year'] == today.year and row['d'] < 270:
            continue
        maxsday.append(row['d'] - delta)
        years.append(row['year'])

    df = pd.DataFrame(dict(year=pd.Series(years), doy=pd.Series(maxsday)))
    maxsday = np.array(maxsday)

    (fig, ax) = plt.subplots(1, 1)
    ax.scatter(years, maxsday)
    ax.grid(True)
    ax.set_ylabel("%s Date" % ('End' if delta == 0 else 'Start', ))
    ax.set_title(("%s [%s] %s\n"
                  "%s Date of Warmest (Avg Temp) 91 Day Period") %
                 (nt.sts[station]['name'], station, PDICT.get(which),
                  'End' if delta == 0 else 'Start'))

    yticks = []
    yticklabels = []
    for i in np.arange(min(maxsday) - 5, max(maxsday) + 5, 1):
        ts = datetime.datetime(2000, 1, 1) + datetime.timedelta(days=i)
        if ts.day in [1, 8, 15, 22, 29]:
            yticks.append(i)
            yticklabels.append(ts.strftime("%-d %b"))
    ax.set_yticks(yticks)
    ax.set_yticklabels(yticklabels)

    h_slope, intercept, r_value, _, _ = stats.linregress(years, maxsday)
    ax.plot(years, h_slope * np.array(years) + intercept, lw=2, color='r')

    avgd = datetime.datetime(
        2000, 1, 1) + datetime.timedelta(days=int(np.average(maxsday)))
    ax.text(0.1,
            0.03,
            "Avg Date: %s, slope: %.2f days/century, R$^2$=%.2f" %
            (avgd.strftime("%-d %b"), h_slope * 100., r_value**2),
            transform=ax.transAxes,
            va='bottom')
    ax.set_xlim(min(years) - 1, max(years) + 1)
    ax.set_ylim(min(maxsday) - 5, max(maxsday) + 5)

    return fig, df
Ejemplo n.º 35
0
def order_slope(single_powers: np.ndarray) -> float:
    ordered_powers = np.sort(single_powers[single_powers > 0])
    return linregress(np.arange(len(ordered_powers)),
                      np.log(ordered_powers))[0]
Ejemplo n.º 36
0
 significanceRange = round(len(yValueArr) / 8)
 significantTrendCount = 0
 significantTrendArray = []
 for n in range(trendLen):
     currentVal = trendArray[n - 1]
     nextVal = trendArray[n]
     if (currentVal != nextVal or (currentVal == nextVal and n == (trendLen - 1))):
         if (n == (trendLen - 1)):
             endIndex = n + 1
         else:
             endIndex = n
         trendLength = endIndex - startIndex + 1
         if trendLength > significanceRange:
             xRange = pd.Series(numericXValueArr).loc[startIndex:endIndex]
             yRange = pd.Series(yValueArr).loc[startIndex:endIndex]
             result = linregress(xRange, yRange)
             intercept = round(result[1], 2)
             slope = round(result[0], 2)
             trendRange = {"Length": (endIndex - startIndex + 1), "direction": currentVal,
                           "start": startIndex, "end": endIndex, "slope": slope, "intercept":intercept}
             significantTrendArray.append(trendRange)
             significantTrendCount += 1
             startIndex = n
 # sort the trend dictionaries by length
 if (significantTrendCount > 1):
     # normalize trend slopes to get magnitudes for multi-trend charts
     slopes = np.array([trend['slope'] for trend in significantTrendArray]).reshape(-1, 1)
     scaler = preprocessing.MinMaxScaler()
     scaler.fit(slopes)
     scaledSlopes = scaler.transform(slopes)
     print(significantTrendArray)
Ejemplo n.º 37
0
        #std = 0
        avgAccum = np.append(avgAccum, avg)
        peakAccum = np.append(peakAccum, peak)
        marker = '^'
    else:
        avgAccum = np.append(avgAccum, avg)
        peakAccum = np.append(peakAccum, peak)
        marker = 'o'

    ax.errorbar(peak,
                avg,
                xerr=error,
                yerr=std,
                marker=marker,
                color=color,
                capsize=3)
    #ax.scatter(peak, avg)
    ax.annotate(mol, (peak + 0.02, avg + 3))

    #index +=1

slope, intercept, r_value, p_value, std_error = linregress(peakAccum, avgAccum)
x = np.linspace(np.min(peakAccum), np.max(peakAccum), 100)
y = slope * x + intercept

ax.plot(x, y, label="r = %.3f" % r_value, color='k')

ax.legend(loc=1)

fig.savefig('figures/peak_v_cnc_scount.png', format='png')
def do_linear_regression(X, Y):
    # 여기에 내용을 채우세요.
    slope, intercept, r_value, p_value, std_err = linregress(X, Y)
    return slope, intercept
Ejemplo n.º 39
0
    def test(self, input, target, iprint=1, filename=None):
        """
        Calculates output and parameters of regression.

        :Parameters:
            input : 2-D array
                Array of input patterns
            target : 2-D array
                Array of network targets
            iprint : {0, 1, 2}, optional
                Verbosity level: 0 -- print nothing, 1 -- print regression
                parameters for each output node (default), 2 -- print
                additionaly general network info and all targets vs. outputs
            filename : str
                Path to the file where printed messages are redirected
                Default is None

        :Returns:
            out : tuple
                *(output, regress)* tuple where: *output* is an array of network
                answers on input patterns and *regress* contains regression
                parameters for each output node. These parameters are: *slope,
                intercept, r-value, p-value, stderr-of-slope, stderr-of-estimate*.

        :Examples:
            >>> from ffnet import mlgraph, ffnet
            >>> from numpy.random import rand
            >>> conec = mlgraph((3,3,2))
            >>> net = ffnet(conec)
            >>> input = rand(50,3); target = rand(50,2)
            >>> output, regress = net.test(input, target)
            Testing results for 50 testing cases:
            OUTPUT 1 (node nr 8):
            Regression line parameters:
            slope         = -0.000649
            intercept     =  0.741282
            r-value       = -0.021853
            p-value       =  0.880267
            slope stderr  =  0.004287
            estim. stderr =  0.009146
            .
            OUTPUT 2 (node nr 7):
            Regression line parameters:
            slope         =  0.005536
            intercept     =  0.198818
            r-value       =  0.285037
            p-value       =  0.044816
            slope stderr  =  0.002687
            estim. stderr =  0.005853

            Exemplary plot:

            .. plot::
                :include-source:

                from ffnet import mlgraph, ffnet
                from numpy.random import rand
                from numpy import linspace
                import pylab

                # Create and train net on random data
                conec = mlgraph((3,10,2))
                net = ffnet(conec)
                input = rand(50,3); target = rand(50,2)
                net.train_tnc(input, target, maxfun = 400)
                output, regress = net.test(input, target, iprint = 0)

                # Plot results for first output
                pylab.plot(target.T[0], output.T[0], 'o',
                                        label='targets vs. outputs')
                slope = regress[0][0]; intercept = regress[0][1]
                x = linspace(0,1)
                y = slope * x + intercept
                pylab.plot(x, y, linewidth = 2, label = 'regression line')
                pylab.legend()
                pylab.show()
        """
        # Check if we dump stdout to the file
        if filename:
            import sys
            file = open(filename, 'w')
            saveout = sys.stdout
            sys.stdout = file
        # Print network info
        if iprint == 2:
            print(self)
            print('')
        # Test data and get output
        input, target = self._testdata(input, target)
        nump = len(input)
        output = self(input)  #array([self(inp) for inp in input])
        # Calculate regression info
        from scipy.stats import linregress
        numo = len(self.outno)
        target = target.transpose()
        output = output.transpose()
        regress = []
        if iprint: print("Testing results for %i testing cases:" % nump)
        for o in range(numo):
            if iprint:
                print("OUTPUT %i (node nr %i):" % (o + 1, self.outno[o]))
            if iprint == 2:
                print("Targets vs. outputs:")
                for p in range(nump):
                    print("%4i % 13.6f % 13.6f" %
                          (p + 1, target[o, p], output[o, p]))
            x = target[o]
            y = output[o]
            r = linregress(x, y)
            # linregress calculates stderr of the slope instead of the estimate, even
            # though the docs say something else. we calculate the thing here manually
            sstd = r[-1]
            estd = sstd * sqrt(((x - x.mean())**2).sum())
            r += (estd, )
            if iprint:
                print("Regression line parameters:")
                print("slope         = % f" % r[0])
                print("intercept     = % f" % r[1])
                print("r-value       = % f" % r[2])
                print("p-value       = % f" % r[3])
                print("slope stderr  = % f" % r[4])
                print("estim. stderr = % f" % r[5])
            regress.append(r)
            if iprint: print('')
        # Close file and restore stdout
        if filename:
            file.close()
            sys.stdout = saveout

        return output.transpose(), regress
def plot_abundance_correlation_all(data_dir):
    files = glob.glob(
        '{}/images_table_mix_*_results_abundance.csv'.format(data_dir))
    fig_abundance = plt.figure(0)
    fig_fp = plt.figure(1)
    fig_abundance.set_size_inches(cm_to_inches(4.25), cm_to_inches(4.25))
    fig_fp.set_size_inches(cm_to_inches(4), cm_to_inches(3))
    plt.figure(0)
    color_list = [
        'darkviolet', 'navy', 'fuchsia', 'red', 'limegreen', 'gold',
        'darkorange', 'dodgerblue'
    ]
    for i in range(len(files)):
        filename = files[i]
        sum_tab = pd.read_csv(filename)
        mix_id = int(
            re.sub('mix_', '',
                   re.search('mix_[0-9]*', filename).group(0)))
        input_tab_filename = '{}/hiprfish_1023_mix_{}.csv'.format(
            data_dir, str(mix_id))
        input_tab = pd.read_csv(input_tab_filename)
        abundance = sum_tab.drop(columns=['Barcodes'])
        mean_absolute_abundance = abundance.sum(axis=1)
        ul_absolute_abundance = np.percentile(abundance.values, 75, axis=1)
        ll_absolute_abundance = np.percentile(abundance.values, 25, axis=1)
        sum_tab['MeasuredAbundance'] = mean_absolute_abundance / np.sum(
            mean_absolute_abundance)
        sum_tab['ULAbundance'] = ul_absolute_abundance / np.sum(
            mean_absolute_abundance)
        sum_tab['LLAbundance'] = ll_absolute_abundance / np.sum(
            mean_absolute_abundance)
        sum_tab = sum_tab.merge(input_tab, how='left', on='Barcodes').fillna(0)
        sum_tab_fp = sum_tab.loc[sum_tab.Concentration.values == 0]
        plt.figure(0)
        plt.plot(sum_tab.Concentration.values * 1000,
                 sum_tab.MeasuredAbundance.values * 1000,
                 '.',
                 markersize=4,
                 alpha=0.5,
                 color=color_list[i],
                 markeredgewidth=0)
        sum_tab_trim = sum_tab[sum_tab.Concentration != 0]
        slope, intercept, r_value, p_value, std_err = linregress(
            sum_tab_trim.Concentration.values,
            sum_tab_trim.MeasuredAbundance.values)
        gross_error_rate = sum_tab.loc[sum_tab.Concentration.values ==
                                       0].MeasuredAbundance.sum()
        plt.figure(1)
        plt.hist(sum_tab_fp.MeasuredAbundance.values * 1000,
                 bins=100,
                 alpha=0.2)
    plt.figure(0)
    plt.xlabel(r'Input$\times 10^{3}$', fontsize=8, color='black')
    plt.ylabel(r'Measured$\times 10^{3}$',
               fontsize=8,
               color='black',
               labelpad=1)
    plt.tick_params(direction='in',
                    width=0.5,
                    length=2,
                    labelsize=8,
                    labelcolor='black',
                    color='black')
    lim_max = np.maximum(np.max(sum_tab.Concentration),
                         np.max(sum_tab.MeasuredAbundance)) * 1.05
    abundance_correlation_filename = '{}/abundance_correlation_all.pdf'.format(
        data_dir)
    abundance_fp_filename = '{}/abundance_false_positive_histogram.pdf'.format(
        data_dir)
    plt.plot([0, 17.5], [0, 17.5],
             '--',
             color='black',
             alpha=0.8,
             linewidth=0.5)
    plt.xlim(-0.5, 17.5)
    plt.ylim(-0.5, 17.5)
    plt.subplots_adjust(left=0.22, bottom=0.2, right=0.99, top=0.99)
    plt.axes().set_aspect('equal')
    plt.savefig(abundance_correlation_filename, dpi=300, transparent=True)
    plt.figure(1)
    plt.yscale('log')
    plt.xlabel(r'Measured Abundance$\times 10^{3}$', fontsize=8)
    plt.ylabel('Frequency', fontsize=8)
    plt.tick_params(direction='in', width=0.5, length=2, labelsize=8)
    plt.subplots_adjust(left=0.22, right=0.95, top=0.9, bottom=0.2)
    plt.savefig(abundance_fp_filename, dpi=300, transparent=True)
    plt.close()
Ejemplo n.º 41
0
fourteen_days_index = datetime_list.index(
    closest_date_with_data)  #get index of our "14 days ago" date

last_14_days = daily_confirmed[(fourteen_days_index - len(daily_confirmed)):]
last_14_days_datetimes = datetime_list[(fourteen_days_index -
                                        len(daily_confirmed)):]
trend = [0]
for i in range(len(last_14_days)):
    if i > 0:
        if last_14_days[i] == 0:
            pass
        else:
            trend.append(last_14_days[i] - last_14_days[i - 1])

x_vals = [i for i in range(len(last_14_days))]
slope, intercept, r_value, p_value, std_err = linregress(x_vals, last_14_days)

if slope > 0:
    trend_color = '#faafaf'
elif slope <= 0:
    trend_color = '#affaaf'

#calculate the simple moving average with a window size of 5
#DAILY CASES
daily_confirmed = np.array(daily_confirmed)
confirmed_moving = list(moving_average(daily_confirmed))
leading_zeroes = [0, 0, 0, 0]
confirmed_moving = leading_zeroes + confirmed_moving
san_diego_data["Confirmed_Moving"] = confirmed_moving

#CUMULATIVE CASES
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(inputDir):
			raise Exception, "variantDir does not currently exist as a directory"

		if not os.path.exists(plotOutDir):
			os.mkdir(plotOutDir)

		ap = AnalysisPaths(inputDir, variant_plot = True)

		if ap.n_generation == 1:
			print "Need more data to create addedMass"
			return

		allScatter = plt.figure()
		allScatter.set_figwidth(11)
		allScatter.set_figheight(6)

		plt.style.use('seaborn-deep')
		color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color']

		title_list = [r"Glucose minimal, $\tau = $44 min", r"Glucose minimal anaerobic, $\tau = $100 min", r"Glucose minimal + 20 amino acids, $\tau = $25 min"]

		for varIdx in ap.get_variants():

			if varIdx == 0:
				plotIdx = 1
				gen = [2,3]
			elif varIdx == 1:
				plotIdx = 0
				gen = [2,3]
			elif varIdx == 2:
				plotIdx = 2
				gen = [2,3]
			else:
				continue

			initial_masses = np.zeros(0)
			final_masses = np.zeros(0)

			all_cells = ap.get_cells(generation=gen, variant=[varIdx])
			if len(all_cells) == 0:
				continue

			fail = 0
			for simDir in all_cells:
				try:
					simOutDir = os.path.join(simDir, "simOut")
					mass = TableReader(os.path.join(simOutDir, "Mass"))
					cellMass = mass.readColumn("dryMass")

					initial_masses = np.hstack((initial_masses, cellMass[0]))
					final_masses = np.hstack((final_masses, cellMass[-1]))
				except Exception as e:
					print e
					fail+=1

			added_masses = final_masses - initial_masses

			scaled_initial_masses = initial_masses / initial_masses.mean()
			scaled_added_masses = added_masses / added_masses.mean()

			nbins = 5

			n, xbin = np.histogram(scaled_initial_masses, bins=nbins)
			sy, xbin = np.histogram(scaled_initial_masses, bins=nbins, weights=scaled_added_masses)
			sy2, xbin = np.histogram(scaled_initial_masses, bins=nbins, weights=scaled_added_masses*scaled_added_masses)
			mean = sy / n
			std = np.sqrt(sy2/(n-1) - n*mean*mean/(n-1))

			slope, intercept, r_value, p_value, std_err = linregress(scaled_initial_masses, scaled_added_masses)

			# plot all scatter plots
			plt.figure(allScatter.number)
			ax = plt.subplot2grid((1,3), (0,plotIdx))
			ax.plot(scaled_initial_masses, scaled_added_masses, '.', color = "black", alpha = 0.2, zorder=1, markeredgewidth = 0.0)
			ax.errorbar(((xbin[1:] + xbin[:-1])/2), mean, yerr=std, color = "black", linewidth=1, zorder=2)
			ax.plot(scaled_initial_masses, slope * scaled_initial_masses + intercept, color = "blue")

			ax.set_title(
				title_list[varIdx] + ", n=%d" % ((len(all_cells) - fail), ) + "\n" +
				r"$m_{add}$=%.3f$\times$$m_{init}$ + %.3f" % (slope,intercept) + "\n" +
				"r-value=%0.2g" % r_value + "\n" +
				"p-value=%0.2g" % p_value,
				fontsize=FONT_SIZE)

			ax.set_xlim([INIT_MASS_LOWER_LIM, INIT_MASS_UPPER_LIM])
			ax.set_ylim([ADDED_MASS_LOWER_LIM, ADDED_MASS_UPPER_LIM])
			ax.get_yaxis().get_major_formatter().set_useOffset(False)
			ax.get_xaxis().get_major_formatter().set_useOffset(False)

			if varIdx == 1:
				ax.set_ylabel("Normed added mass", fontsize=FONT_SIZE)
			ax.set_xlabel("Normed initial mass", fontsize=FONT_SIZE)

			plt.subplots_adjust(bottom = 0.2)

			whitePadSparklineAxis(ax)

			for tick in ax.yaxis.get_major_ticks():
				tick.label.set_fontsize(FONT_SIZE)
			for tick in ax.xaxis.get_major_ticks():
				tick.label.set_fontsize(FONT_SIZE)

			# plot stripped figure
			fig = plt.figure()
			fig.set_figwidth(3)
			fig.set_figheight(2)
			ax = plt.subplot2grid((1,1), (0,0))
			ax.plot(scaled_initial_masses, scaled_added_masses, '.', color = color_cycle[0], alpha = 0.25, ms=6, zorder=1, markeredgewidth = 0.0, clip_on=False)
			ax.plot(scaled_initial_masses, slope * scaled_initial_masses + intercept, color = 'k')

			ax.set_xlim([INIT_MASS_LOWER_LIM, INIT_MASS_UPPER_LIM])
			ax.set_ylim([ADDED_MASS_LOWER_LIM, ADDED_MASS_UPPER_LIM])

			ax.get_yaxis().get_major_formatter().set_useOffset(False)
			ax.get_xaxis().get_major_formatter().set_useOffset(False)

			whitePadSparklineAxis(ax)

			ax.tick_params(which='both', bottom=True, left=True,
				top=False, right=False, labelbottom=True, labelleft=True,
				labelsize=FONT_SIZE)

			ax.set_xlabel("")
			ax.set_ylabel("")

			plt.tight_layout()
			exportFigure(plt, plotOutDir, plotOutFileName + str(varIdx) + "_stripped", metadata, transparent = True)

		plt.figure(allScatter.number)
		exportFigure(plt, plotOutDir, plotOutFileName, metadata)

		plt.close("all")
Ejemplo n.º 43
0
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

M = 10000
N = 52 * 5
betaHat = np.empty(M)
Rsqrd = np.empty(M)

for i in range(M):
    y = np.cumsum(np.random.normal(size=N))
    x = np.cumsum(np.random.normal(size=N))
    reg = stats.linregress(x, y)
    betaHat[i] = reg.slope
    Rsqrd[i] = reg.rvalue**2

plt.hist(betaHat, bins=100)
plt.title('Sampling Distribution of Beta')
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.savefig("images/Spurious-Beta-Histogram.png")
plt.clf()

plt.hist(Rsqrd, bins=100)
plt.title('Sampling Distribution of R-Squared')
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.savefig("images/Spurious-Rsqrd-Histogram.png")
Ejemplo n.º 44
0
    def __call__(
        self,
        screen_object: Union['Screen', Any],
        mode: Literal["mean", "pointmutant"] = 'pointmutant',
        min_score: Optional[float] = None,
        max_score: Optional[float] = None,
        replicate: int = -1,
        replicate_second_object: int = -1,
        output_file: Union[None, str, Path] = None,
        **kwargs: Any,
    ) -> None:
        """
        Generate a scatter plot between object and a second object of the
        same class.

        Parameters
        ----------
        screen_object : object from class *Screen* to do the scatter with

        mode : str, default 'pointmutant'.
            Alternative set to "mean" for the mean of each position.

        min_score : float, default None
            Change values below a minimum score to be that score.
            i.e., setting min_score = -1 will change any value smaller
            than -1 to -1.

        max_score : float, default None
            Change values below a maximum score to be that score.
            i.e., setting max_score = 1 will change any value greater
            than 1 to 1.

        replicate : int, default -1
            Set the replicate to plot. By default, the mean is plotted.
            First replicate start with index 0.
            If there is only one replicate, then leave this parameter
            untouched.

        replicate_second_object : int, default -1
            Set the replicate to plot. By default, the mean is plotted.
            First replicate start with index 0.
            If there is only one replicate, then leave this parameter
            untouched.

        output_file : str, default None
            If you want to export the generated graph, add the path and name
            of the file. Example: 'path/filename.png' or 'path/filename.svg'.

        **kwargs : other keyword arguments
        """
        temp_kwargs = self._update_kwargs(kwargs)
        self.graph_parameters()

        # Chose mode:
        if mode.lower() == 'pointmutant':
            df_output: DataFrame = process_by_pointmutant(
                self.dataframes.df_notstopcodons_limit_score(
                    min_score, max_score)[replicate],
                screen_object.dataframes.df_notstopcodons_limit_score(
                    min_score, max_score)[replicate_second_object])
        else:
            df_output = process_mean_residue(
                self.dataframes.df_notstopcodons_limit_score(
                    min_score, max_score)[replicate],
                screen_object.dataframes.df_notstopcodons_limit_score(
                    min_score, max_score)[replicate_second_object])

        # create figure
        self.fig, self.ax_object = plt.subplots(figsize=temp_kwargs['figsize'])

        # Scatter data points
        plt.scatter(df_output['dataset_1'],
                    df_output['dataset_2'],
                    c='k',
                    s=8,
                    alpha=0.5,
                    rasterized=True,
                    label='_nolegend_')

        # correlation
        _, _, r_value, _, _ = linregress(df_output['dataset_1'],
                                         df_output['dataset_2'])

        # fit and graph line
        fit = np.polyfit(df_output['dataset_1'], df_output['dataset_2'], 1)
        plt.plot(np.unique(df_output['dataset_1']),
                 np.poly1d(fit)(np.unique(df_output['dataset_1'])),
                 color='r',
                 linewidth=1,
                 label="$R^2$ = {}".format(str(round(r_value**2, 2))))

        self._tune_plot(temp_kwargs)
        self._save_work(output_file, temp_kwargs)

        if temp_kwargs['show']:
            plt.show()
Ejemplo n.º 45
0
    def determine_zone_slope(self, dbz_3d, clutter, angles, azim_zone,
                             gate_zone):
        """
        We will only use CONVOL scans for determining slopes
        The following property can be used to exclude clutter from slope.
        self.convol_clutter
        """

        azim_region = self.config['precip']['azim_region']
        gate_region = self.config['precip']['gate_region']

        min_azim = azim_zone * azim_region
        max_azim = (azim_zone + 1) * azim_region

        min_gate = gate_zone * gate_region
        max_gate = (gate_zone + 1) * gate_region

        angle_list = []
        dbz_list = []
        height_list = []

        # Above this threshold, we consider it rain.
        max_dbz_per_km = -5.0

        for x in range(0, len(angles)):
            angle = angles[x]
            zone_data = dbz_3d[x, min_azim:max_azim, min_gate:max_gate]
            # Should be more OOP
            zone_clutter = clutter[x, min_azim:max_azim, min_gate:max_gate]
            zone_clutter_bool = zone_clutter.astype(bool)

            if zone_data.shape != zone_clutter.shape:
                raise ValueError("Incompatible zone shapes.")

            zone_flat = list(zone_data.flatten())

            zone_clutter_flat = list(zone_clutter_bool.flatten())

            num_cells = len(zone_flat)
            num_clutter = len(zone_clutter_flat)

            if num_cells != num_clutter:
                raise ValueError("Incompatible list lengths.")

            for y in range(0, num_cells):
                if not zone_clutter_flat[y]:
                    dbz = zone_flat[y]
                    if not isinstance(dbz, np.ma.core.MaskedConstant):
                        angle_list.append(angle)
                        dbz_list.append(dbz)
                        # Making trig approximation h = x*tan(theta)
                        # Note, distance must be in km, and theta must
                        # be converted to radians.
                        # TODO: Use builtin pyart methods.
                        rad_angle = math.radians(angle)
                        # Using midpoint approximation
                        midpoint = int((min_gate + max_gate) / 2.0)
                        # Convert to kilometers
                        distance = midpoint * self.grid_info.gate_step * 0.001
                        height = distance * math.tan(rad_angle)
                        height_list.append(height)

        if len(angle_list) != len(dbz_list):
            raise ValueError("Zone slope error")

        # If there is only data from one elevation angle, we cannot
        # compute the slope.

        angle_set = set(angle_list)
        if len(angle_set) < 2:
            return np.nan
        else:
            slope, intercept, r_value, p_value, std_err = stats.linregress(
                height_list, dbz_list)
            return slope
Ejemplo n.º 46
0
        data[barcode][n] = data[barcode][n] / sample_sum

for barcode in data:
    data[barcode] = np.log2(data[barcode])

# x vals (# of generations) for slope calculations
time_points = [0.0, 1.74, 3.71, 0, 1.75, 3.37, 0., 2., 4.]

# calculate slope for every allele for each experiment
barcode_slopes = dict(zip(data.keys(), np.zeros((len(data.keys()), 3))))

for barcode in data:
    for n in range(0, 3):
        yvals = data[barcode][(n * 3):(n * 3 + 3)]
        xvals = time_points[(n * 3):(n * 3 + 3)]
        line = stats.linregress(xvals, yvals)
        barcode_slopes[barcode][n] = line[0]

#print barcode_slopes


def get_fitness(barcode_slopes):
    # get avg wt slope for each experiment
    wt_slopes = np.zeros((1, 3))
    wt_count = 0
    for barcode in barcode_slopes:
        if allele_map[barcode][1] == 'WT':
            wt_slopes += barcode_slopes[barcode]
            wt_count += 1

    wt_slopes = wt_slopes / float(wt_count)
Ejemplo n.º 47
0
def get_trendlines_regression(signal: list, **kwargs) -> dict:
    """Get Trendlines Regression

    A regression-only based method of generating trendlines (w/o use of local minima and maxima).

    Arguments:
        signal {list} -- signal of which to find a trend (can be anything)

    Optional Args:
        iterations {int} -- number of types through trendline creation with "divisors"
                            (default: {15})
        threshold {float} -- acceptable ratio a trendline can be off and still counted in current
                             plot (default: {0.1})
        dates {list} -- typically DataFrame.index (default: {None})
        indicator {str} -- for plot name, indicator trend analyzed (default: {''})
        plot_output {bool} -- (default: {True})
        name {str} -- (default: {''})
        views {str} -- (default: {''})

    Returns:
        dict -- trendline content
    """
    config_path = os.path.join("resources", "config.json")
    if os.path.exists(config_path):
        with open(config_path, 'r') as cpf:
            c_data = json.load(cpf)
            cpf.close()

        ranges = c_data.get('trendlines', {}).get('divisors',
                                                  {}).get('ranges', [])
        ranged = 0
        for rg in ranges:
            if len(signal) > rg:
                ranged += 1

        divs = c_data.get('trendlines', {}).get('divisors', {}).get('divisors')
        if divs is not None:
            if len(divs) > ranged:
                DIVISORS = divs[ranged]

    iterations = kwargs.get('iterations', len(DIVISORS))
    threshold = kwargs.get('threshold', 0.1)
    dates = kwargs.get('dates')
    indicator = kwargs.get('indicator', '')
    plot_output = kwargs.get('plot_output', True)
    name = kwargs.get('name', '')
    views = kwargs.get('views', '')

    indexes = list(range(len(signal)))

    if iterations > len(DIVISORS):
        iterations = len(DIVISORS)
    divisors = DIVISORS[0:iterations]

    lines = []
    x_s = []
    t_line_content = []
    line_id = 0

    y_max = max(signal) - min(signal)
    x_max = len(signal)
    scale_change = float(x_max) / float(y_max)

    for div in divisors:
        period = int(len(signal) / div)
        for i in range(div):
            for k in range(2):

                data = dict()
                if i == div - 1:
                    data['value'] = signal[period * i:len(signal)].copy()
                    data['x'] = indexes[period * i:len(signal)].copy()
                else:
                    data['value'] = signal[period * i:period * (i + 1)].copy()
                    data['x'] = indexes[period * i:period * (i + 1)].copy()

                data = pd.DataFrame.from_dict(data)

                while len(data['x']) > 4:
                    reg = linregress(data['x'], data['value'])
                    if k == 0:
                        data = data.loc[data['value'] > reg[0] * data['x'] +
                                        reg[1]]
                    else:
                        data = data.loc[data['value'] < reg[0] * data['x'] +
                                        reg[1]]

                reg = linregress(data['x'], data['value'])
                content = {'slope': reg[0], 'intercept': reg[1]}
                content['angle'] = np.arctan(
                    reg[0] * scale_change) / np.pi * 180.0
                if reg[0] < 0.0:
                    content['angle'] = 180.0 + \
                        (np.arctan(reg[0] * scale_change) / np.pi * 180.0)

                line = []
                for ind in indexes:
                    line.append(reg[0] * ind + reg[1])

                x_line = indexes.copy()

                line_corrected, x_corrected = filter_nearest_to_signal(
                    signal, x_line, line)

                if len(x_corrected) > 0:
                    content['length'] = len(x_corrected)
                    content['id'] = line_id
                    line_id += 1

                    lines.append(line_corrected.copy())
                    x_s.append(x_corrected.copy())
                    t_line_content.append(content)
                    # lines.append(line)
                    # x_s.append(x_line)

        for i in range(period, len(signal), 2):
            for k in range(2):

                data = dict()
                data['value'] = signal[i - period:i].copy()
                data['x'] = indexes[i - period:i].copy()

                data = pd.DataFrame.from_dict(data)

                while len(data['x']) > 4:
                    reg = linregress(data['x'], data['value'])
                    if k == 0:
                        data = data.loc[data['value'] > reg[0] * data['x'] +
                                        reg[1]]
                    else:
                        data = data.loc[data['value'] < reg[0] * data['x'] +
                                        reg[1]]

                reg = linregress(data['x'], data['value'])
                content = {'slope': reg[0], 'intercept': reg[1]}
                content['angle'] = np.arctan(
                    reg[0] * scale_change) / np.pi * 180.0
                if reg[0] < 0.0:
                    content['angle'] = 180.0 + \
                        (np.arctan(reg[0] * scale_change) / np.pi * 180.0)

                line = []
                for ind in indexes:
                    line.append(reg[0] * ind + reg[1])

                x_line = indexes.copy()

                line_corrected, x_corrected = filter_nearest_to_signal(
                    signal, x_line, line, threshold=threshold)

                if len(x_corrected) > 0:
                    content['length'] = len(x_corrected)
                    content['id'] = line_id
                    line_id += 1

                    lines.append(line_corrected.copy())
                    x_s.append(x_corrected.copy())
                    t_line_content.append(content)

    # handle over load of lines (consolidate)
    # Idea: bucket sort t_line_content by 'slope', within each bucket then consolidate similar
    # intercepts, both by line extension/combination and on slope averaging. Track line 'id' list
    # so that the corrections can be made for plots and x_plots
    t_line_content, lines, x_s = consolidate_lines(t_line_content, lines, x_s,
                                                   signal)

    t_line_content, lines, x_s = consolidate_lines(t_line_content,
                                                   lines,
                                                   x_s,
                                                   signal,
                                                   thresh=0.2)

    t_line_content, lines, x_s = consolidate_lines(t_line_content,
                                                   lines,
                                                   x_s,
                                                   signal,
                                                   thresh=0.3)

    plots = []
    x_plots = []
    plots.append(signal)
    x_plots.append(list(range(len(signal))))
    plots.extend(lines)
    x_plots.extend(x_s)

    if dates is not None:
        new_xs = []
        for xps in x_plots:
            nxs = [dates[i] for i in xps]
            new_xs.append(nxs)

        x_plots = new_xs

    title = f"{indicator.capitalize()} Trendlines"
    if plot_output:
        generic_plotting(plots, x=x_plots, title=title)
    else:
        filename = os.path.join(name, views,
                                f"{indicator}_trendlines_{name}.png")
        generic_plotting(plots,
                         x=x_plots,
                         title=title,
                         filename=filename,
                         saveFig=True)

    trends = dict()
    return trends
Ejemplo n.º 48
0
        err = float(f.readline().strip())
    data = numpy.genfromtxt(file_name, skip_header=1, delimiter=",")
    return data, err


import sys
try:
    name = sys.argv[1]
except IndexError:
    name = "KCl"

fig = plt.style.use("science")
plt.figure(figsize=(2.8, 2.1), facecolor="w")
# plt.axvline(x=0, ls="--", color="#00ffee")
data, err = read(name)
# data[-1, -1] *= 1.5
plt.errorbar(data[:, 0], data[:, 1], yerr=err / 2, fmt="o")
log_x = numpy.log(data[:, 0])
log_y = numpy.log(data[:, 1])
p = linregress(log_x, log_y)
print(p)
xx = numpy.logspace(-4.2, -1.5)
yy = 0.0014 * xx**-0.587
plt.plot(xx, yy, "--")
# plt.xscale("log")
plt.xlabel("KCl concentration (mol/L)")
plt.ylabel("Rectification")
plt.xscale("log")
plt.yscale("log")
plt.savefig("../img/rect_{}_conc.svg".format(name))
 def _getValues(self, index_returns: Series = Series(), portfolio_returns: Series = Series()):
     return stats.linregress(index_returns, portfolio_returns)
# In[13]:

# Sort by x axis
x_axis, y_axis = (list(t) for t in zip(
    *sorted(zip(trimmed_lengths, trimmed_colonies))))

# In[14]:

# Gather Data
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

slope, intercept, r_value, p_value, std_err = stats.linregress(x_axis, y_axis)
print("slope: " + str(round(slope, 4)))
print("r value: " + str(round(r_value, 4)))
print("r squared value: " + str(round(r_value**2, 4)))
print("p value: " + str(round(p_value, 8)))
print("std err: " + str(round(std_err, 3)))
plt.scatter(x_axis, y_axis)
plt.plot(np.unique(x_axis),
         np.poly1d(np.polyfit(x_axis, y_axis, 1))(np.unique(x_axis)))
plt.title('Transformation Efficiency in Aquarium from Integrant Length')
plt.xlabel('Plasmid Length')
plt.ylabel('Colonies Produced')
plt.show()

# In[ ]:
def _slope(ts): # original (James?)
    x = np.arange(len(ts))
    log_ts = np.log(ts)
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, log_ts)
    annualized_slope = ((1 + slope)**250 ) * 100 
    return annualized_slope * (r_value ** 2) 
Ejemplo n.º 52
0
def get_lines_from_period(fund: pd.DataFrame, kargs: list, interval: int,
                          **kwargs) -> list:
    """Get Lines from Period

    Arguments:
        fund {pd.DataFrame} -- fund dataset
        kargs {list} -- mins and maxes of x and y lists
        interval {int} -- period of time for a lookback of a trend

    Optional Args:
        vq {dict} -- volatility quotient, used to determine if a trendline is still valid at the
                     end of the period by providing a volatility threshold (default: {0.06})

    Returns:
        list -- list of trendlines given the period
    """
    vq = kwargs.get('vq', 0.06)

    EXTENSION = interval
    BREAK_LOOP = 50
    cycles = int(np.floor(len(fund['Close']) / interval))
    mins_y = kargs[1]
    mins_x = kargs[0]
    maxes_y = kargs[3]
    maxes_x = kargs[2]
    X = []
    Y = []

    for cycle in range(cycles):
        start = cycle * interval
        end = start + interval
        data = fund['Close'][start:end].copy()

        x = list(range(start, end))
        reg = linregress(x=x, y=data)
        if reg[0] >= 0:
            use_min = True
        else:
            use_min = False

        count = 0
        st_count = count
        if use_min:
            while (count < len(mins_x)) and (mins_x[count] < start):
                count += 1
                st_count = count

            end_count = st_count
            while (count < len(mins_x)) and (mins_x[count] < end):
                count += 1
                end_count = count

            datay = mins_y[st_count:end_count].copy()
            datax = mins_x[st_count:end_count].copy()
            dataz = {}
            dataz['x'] = datax
            dataz['y'] = datay
            dataw = pd.DataFrame.from_dict(dataz)
            dataw.set_index('x')
            datav = dataw.copy()

            stop_loop = 0
            while ((len(dataw['x']) > 0) and
                   (reg[0] > 0.0)) and (stop_loop < BREAK_LOOP):
                reg = linregress(x=dataw['x'], y=dataw['y'])
                datav = dataw.copy()
                dataw = dataw.loc[dataw['y'] < reg[0] * dataw['x'] + reg[1]]
                stop_loop += 1

            if reg[0] < 0.0:
                dataw = datav.copy()
                if len(dataw) >= 2:
                    reg = linregress(x=dataw['x'], y=dataw['y'])

        else:
            while (count < len(maxes_x)) and (maxes_x[count] < start):
                count += 1
                st_count = count

            end_count = st_count
            while (count < len(maxes_x)) and (maxes_x[count] < end):
                count += 1
                end_count = count

            datay = maxes_y[st_count:end_count].copy()
            datax = maxes_x[st_count:end_count].copy()
            dataz = {}
            dataz['x'] = datax
            dataz['y'] = datay
            dataw = pd.DataFrame.from_dict(dataz)
            dataw.set_index('x')
            datav = dataw.copy()

            stop_loop = 0
            while ((len(dataw['x']) > 0) and
                   (reg[0] < 0.0)) and (stop_loop < BREAK_LOOP):
                reg = linregress(x=dataw['x'], y=dataw['y'])
                datav = dataw.copy()
                dataw = dataw.loc[dataw['y'] > reg[0] * dataw['x'] + reg[1]]
                stop_loop += 1

            if reg[0] > 0.0:
                dataw = datav.copy()
                if len(dataw) >= 2:
                    reg = linregress(x=dataw['x'], y=dataw['y'])

        end = line_extender(fund, list(range(start, end)), reg)
        if end != 0:
            max_range = [start, end]

            if max_range[1] > len(fund['Close']):
                max_range[1] = len(fund['Close'])
            if interval > 100:
                max_range[1] = len(fund['Close'])
            if end + EXTENSION > int(0.9 * float(len(fund['Close']))):
                max_range[1] = len(fund['Close'])

            max_range[1] = line_reducer(fund, max_range[1], reg, threshold=vq)

            datax = list(range(max_range[0], max_range[1]))
            datay = [reg[0] * float(x) + reg[1] for x in datax]

            if (len(datay) > 0) and (not math.isnan(datay[0])):
                X.append(datax)
                Y.append(datay)

    return X, Y
def slope(ts): ## new version
    x = np.arange(len(ts))  
    log_ts = np.log(ts)  
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, log_ts)  
    annualized_slope = (np.power(np.exp(slope), 250) - 1) * 100 
    return annualized_slope * (r_value ** 2)
    def compute(self, drift_tube_length=90.33, neutral_mass=28.013):
        """compute the ccs values based on the multi-field parameters
        """
        # ========================
        # given parameters
        # ========================
        # mass: scalar
        # drift_tube_length (cm): scalar
        # temperatures, T(C): array --> T(K) = T(C)+273.15
        T_K = np.array(self.temperatures) + 273.15
        # pressures, P(torr): array --> P(Pa) = P(torr)/760*101325
        P_torr = np.array(self.pressures)
        P_Pa = P_torr / 760 * 101325
        # voltage_cell, Vcell: array --> E = Vcell / drift_tube_length
        Vcell = np.array(self.voltages)
        E = Vcell / drift_tube_length
        inv_E = 1.0 / (E * 100.0)
        # arrival_time (ms): array
        arrival_sec = np.array(self.arrival_time) / 1000
        # neutral_mass = 28.013 (N2 by default)
        # ========================
        # constant parameters
        # ========================
        # 1.60217657E-19 or 1.6021766208E-19
        e = 1.6021766208E-19
        charge_state = 1
        boltzmann_constant = 1.38064852E-23
        N0 = 101325 / boltzmann_constant / 273.15  # N0_(m-3)
        # ========================
        # computed parameters by given
        # ========================
        # P/V = P(torr) / Vcell
        self._p_v = P_torr / Vcell
        # E/N (Td) = E / P(torr) / 0.3535
        E_N = (E / P_torr) / 0.3535
        mass_in_kg = self.mass * 1.66054E-27
        neutral_mass_in_kg = neutral_mass * 1.66054E-27
        reduced_mass_in_kg = (mass_in_kg * neutral_mass_in_kg /
                              (mass_in_kg + neutral_mass_in_kg))
        # ========================

        slope, intercept, r_value, p_value, std_err = linregress(
            self._p_v, arrival_sec)
        # drift_time (sec) = arrival_sec - intercept
        drift_time = arrival_sec - intercept

        # compute CCS by Mason-Schamp Equation
        # ccs = 3 * e / 16 / N0 * np.sqrt(2 * np.pi / reduced_mass_in_kg / boltzmann_constant / T_K) \
        # * drift_time * 760 * T_K * Vcell / (drift_tube_length / 100)**2 / P_torr / 273.15 * 1E20
        K0 = drift_tube_length * drift_tube_length / slope * 273.15 / 760 / np.mean(
            T_K)
        ccs = 3 * e / 16 / N0 / K0 / 0.0001 * np.sqrt(
            2 * np.pi /
            (boltzmann_constant * reduced_mass_in_kg * np.mean(T_K))) * 1e20
        properties = {
            'slope': slope,
            'intercept': intercept,
            'r2': r_value**2,
            'p_value': p_value,
            'k0': K0,
            'ccs': ccs
        }
        for p in properties:
            self._metadata[p] = properties[p]
Ejemplo n.º 55
0
def momentum_func(self, price_array):
    r = np.log(price_array)
    slope, _, rvalue, _, _ = linregress(np.arange(len(r)), r)
    annualized = (1 + slope)**252
    return (annualized * (rvalue**2))
def slope_v(ts): # new (Vladimir)
    x = np.arange(len(ts))
    log_ts = np.log(ts) 
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, log_ts)
    annualized_slope = ((1 + slope)**250 -1.0) * 100 
    return annualized_slope * (r_value ** 2) 
Ejemplo n.º 57
0
 def linear_trend(self):
     self.regression = stats.linregress(self.time,self.magnitude)
     self.features['linear trend'] = self.regression.slope 
Ejemplo n.º 58
0
    # make those sets of the same size in case one is bigger than other
    normalized_length = min(len(company_data), len(market_data))
    company_data = company_data[:normalized_length]
    market_data = market_data[:normalized_length]

    # extract 'return' rows
    company_return = [row[2] for row in company_data]
    market_return = [row[2] for row in market_data]

    # extract estimation period: all observations that were earlier than the event window
    comp_est_period = company_return[EVENT_WINDOW:]
    market_est_period = market_return[EVENT_WINDOW:]

    # calculate linear regression over the estimation period
    beta, alpha, r_value, p_value, std_err = linregress(
        market_est_period, comp_est_period)

    # extrapolate the regression into the event window:
    # put market_data through the regression values and
    # calculate expected company return
    company_expected_return = []
    for idx, val in enumerate(company_return[:EVENT_WINDOW]):
        exp_val = market_return[idx] * beta + alpha
        company_expected_return.append(exp_val)

    # put data into numpy format
    company_expected_return_event_window = np.array(company_expected_return)
    company_return_event_window = np.array(company_return[:EVENT_WINDOW])

    # calculate abnormal return
    abnornal_return = company_return_event_window - company_expected_return_event_window
Ejemplo n.º 59
0
     elif tag in ADJ: adjs.append(w)
 wLen.append(len(words))
 vLen.append(len(verbs))
 nLen.append(len(nouns))
 advLen.append(len(advs))
 adjLen.append(len(adjs))


plotData0 = [(wLen, vLen), (wLen, nLen), (wLen, adjLen)]
yaxisLabels = ['V x 1000', 'N x 1000', 'ADJ x 1000']
plt.figure(figsize=(7.5,7.5))


for (pane, data) in enumerate(plotData0):
 X, Y = data[0], data[1]
 slope, intercept = stats.linregress(X, Y)[0:2]
 rX = slope*array(X) + intercept
 plt.subplot(2, 2, pane+1)
 plt.scatter(X, Y)
 plt.plot(X, rX, 'r',
  label='slope={},\nintercept={}'.format(
  round(slope,2),
  round(intercept,2)))
 plt.ylim(plt.xlim())
 wTicks = [int(tk/1000) for tk in plt.gca().get_xticks()]
 plt.gca().set_xticklabels(wTicks)
 plt.gca().set_yticklabels(wTicks)
 offset = (plt.gca().get_xticks()[1]-plt.gca().get_xticks()[0])/10
 for pt in range(len(X)):
  plt.annotate(str(pt), # scifiCorpus[i]
   xy=(X[pt], Y[pt]),
print(x11.shape)
print(y11.shape)
x11a = np.array([x11, y11])
print(x11a.shape, 'x11a.shape')
x11b = x11a[:, ~np.isnan(x11a).any(axis=0)]
print(x11b.shape, 'X11b.shape')
print(x11b, 'X11b')
x1 = x11b[0]
y1 = x11b[1]
print(x1, 'x1')
print(y1, 'y1')

x1y1 = np.vstack([x1, y1])
z1 = gaussian_kde(x1y1)(x1y1)

slope1, intercept1, r_value1, p_value1, std_err1 = stats.linregress(x1, y1)
line1 = slope1 * x1 + intercept1
correlate_annual = stats.pearsonr(x1, y1)

#Regression ~ using bootstrap
#ind=np.where((gc_data_ammonia_annual!=0)&(sites_ammonia_AM!=0))
#print (ind)
regres_annual = rma(x1, y1, (len(x1)), 1000)
print('slope annual: ', regres_annual[0])
print('Intercept annual: ', regres_annual[1])
print('slope error annual: ', regres_annual[2])
print('Intercept error annual: ', regres_annual[3])

#plotting scatter plot
title_list1 = 'GC and IASI NH$_3$ Column (Annual)'
fig1 = plt.figure(facecolor='White', figsize=[11, 11])