def probplot(x, sparams=(), dist='norm', fit=1, plot=None): """Return (osm, osr){,(scale,loc,r)} where (osm, osr) are order statistic medians and ordered response data respectively so that plot(osm, osr) is a probability plot. If fit==1, then do a regression fit and compute the slope (scale), intercept (loc), and correlation coefficient (r), of the best straight line through the points. If fit==0, only (osm, osr) is returned. sparams is a tuple of shape parameter arguments for the distribution. """ N = len(x) Ui = zeros(N)*1.0 Ui[-1] = 0.5**(1.0/N) Ui[0] = 1-Ui[-1] i = arange(2,N) Ui[1:-1] = (i-0.3175)/(N+0.365) try: ppf_func = eval('distributions.%s.ppf'%dist) except AttributError: raise dist, "is not a valid distribution with a ppf." if sparams is None: sparams = () if isscalar(sparams): sparams = (sparams,) if not isinstance(sparams,types.TupleType): sparams = tuple(sparams) res = inspect.getargspec(ppf_func) if not ('loc' == res[0][-2] and 'scale' == res[0][-1] and \ 0.0==res[-1][-2] and 1.0==res[-1][-1]): raise ValueError, "Function has does not have default location", \ "and scale parameters\n that are 0.0 and 1.0 respectively." if (len(sparams) < len(res[0])-len(res[-1])-1) or \ (len(sparams) > len(res[0])-3): raise ValueError, "Incorrect number of shape parameters." osm = ppf_func(Ui,*sparams) osr = sort(x) if fit or (plot is not None): # perform a linear fit. slope, intercept, r, prob, sterrest = stats.linregress(osm,osr) if plot is not None: try: import scipy.xplt as xplt xplt.limits() except: pass plot.plot(osm, osr, 'o', osm, slope*osm + intercept) plot.title('Probability Plot') plot.xlabel('Order Statistic Medians') plot.ylabel('Ordered Values') try: plot.expand_limits(5) except: pass xmin,xmax= amin(osm),amax(osm) ymin,ymax= amin(x),amax(x) pos = xmin+0.70*(xmax-xmin), ymin+0.01*(ymax-ymin) try: plot.addtext("r^2^=%1.4f" % r, xy=pos,tosys=1) except: pass if fit: return (osm, osr), (slope, intercept, r) else: return osm, osr
def probplot(x, sparams=(), dist='norm', fit=1, plot=None): """Return (osm, osr){,(scale,loc,r)} where (osm, osr) are order statistic medians and ordered response data respectively so that plot(osm, osr) is a probability plot. If fit==1, then do a regression fit and compute the slope (scale), intercept (loc), and correlation coefficient (r), of the best straight line through the points. If fit==0, only (osm, osr) is returned. sparams is a tuple of shape parameter arguments for the distribution. """ N = len(x) Ui = zeros(N)*1.0 Ui[-1] = 0.5**(1.0/N) Ui[0] = 1-Ui[-1] i = arange(2,N) Ui[1:-1] = (i-0.3175)/(N+0.365) try: ppf_func = eval('distributions.%s.ppf' % dist) except AttributeError: raise ValueError("%s is not a valid distribution with a ppf." % dist) if sparams is None: sparams = () if isscalar(sparams): sparams = (sparams,) if not isinstance(sparams,types.TupleType): sparams = tuple(sparams) """ res = inspect.getargspec(ppf_func) if not ('loc' == res[0][-2] and 'scale' == res[0][-1] and \ 0.0==res[-1][-2] and 1.0==res[-1][-1]): raise ValueError("Function has does not have default location " "and scale parameters\n that are 0.0 and 1.0 respectively.") if (len(sparams) < len(res[0])-len(res[-1])-1) or \ (len(sparams) > len(res[0])-3): raise ValueError("Incorrect number of shape parameters.") """ osm = ppf_func(Ui,*sparams) osr = sort(x) if fit or (plot is not None): # perform a linear fit. slope, intercept, r, prob, sterrest = stats.linregress(osm,osr) if plot is not None: plot.plot(osm, osr, 'o', osm, slope*osm + intercept) plot.title('Probability Plot') plot.xlabel('Order Statistic Medians') plot.ylabel('Ordered Values') xmin,xmax= amin(osm),amax(osm) ymin,ymax= amin(x),amax(x) posx,posy = xmin+0.70*(xmax-xmin), ymin+0.01*(ymax-ymin) #plot.addtext("r^2^=%1.4f" % r, xy=pos,tosys=1) plot.text(posx,posy, "r^2=%1.4f" % r) if fit: return (osm, osr), (slope, intercept, r) else: return osm, osr
def calculate_test(self): try: df=self.data.copy() df[self.var]=self.samples for column in df.columns: if column != self.var: reg=stats.linregress(df[column],df[self.var]) self.results.write("\nLinear Regression to "+column+": "+str(reg)+"\n") print("\nLinear Regression to "+column+": "+str(reg)+"\n") except: print("Error in regression")
def linearFit(f0s): """ def linearFit(f0s): Input: list of (time,F0) tuples Output: slope, corrected intercept """ from stats import linregress # this is just a wrapper slope,intercept,a,b,c = linregress([f0 for (time,f0) in f0s], [time-f0s[0][0] for (time,f0) in f0s]) return slope,intercept # return slope, and corrected intercept
def estimatePoissonYoung(principalAxis, stress=0, plot=False, cutoff=0.): """Estimate Poisson's ration given the "principal" axis of straining. For every base direction, homogenized strain is computed (slope in linear regression on discrete function particle coordinate → → particle displacement in the same direction as returned by utils.coordsAndDisplacements) and, (if axis '0' is the strained axis) the poisson's ratio is given as -½(ε₁+ε₂)/ε₀. Young's modulus is computed as σ/ε₀; if stress σ is not given (default 0), the result is 0. cutoff, if > 0., will take only smaller part (centered) or the specimen into account """ dd = [] # storage for linear regression parameters import pylab, numpy try: import stats except ImportError: raise ImportError( "Unable to import stats; install the python-stats package.") from woo import utils if cutoff > 0: cut = utils.fractionalBox(fraction=1 - cutoff) for axis in [0, 1, 2]: if cutoff > 0: w, dw = utils.coordsAndDisplacements(axis, Aabb=cut) else: w, dw = utils.coordsAndDisplacements(axis) l, ll = stats.linregress(w, dw)[0:2] # use only tangent and section dd.append((l, ll, min(w), max(w))) if plot: pylab.plot(w, dw, '.', label=r'$\Delta %s(%s)$' % ('xyz'[axis], 'xyz'[axis])) if plot: for axis in [0, 1, 2]: dist = dd[axis][-1] - dd[axis][-2] c = numpy.linspace(dd[axis][-2] - .2 * dist, dd[axis][-1] + .2 * dist) d = [dd[axis][0] * cc + dd[axis][1] for cc in c] pylab.plot(c, d, label=r'$\widehat{\Delta %s}(%s)$' % ('xyz'[axis], 'xyz'[axis])) pylab.legend(loc='upper left') pylab.xlabel(r'$x,\;y,\;z$') pylab.ylabel(r'$\Delta x,\;\Delta y,\; \Delta z$') pylab.show() otherAxes = (principalAxis + 1) % 3, (principalAxis + 2) % 3 avgTransHomogenizedStrain = .5 * (dd[otherAxes[0]][0] + dd[otherAxes[1]][0]) principalHomogenizedStrain = dd[principalAxis][0] return -avgTransHomogenizedStrain / principalHomogenizedStrain, stress / principalHomogenizedStrain
def tilt(fft): """ def tilt(fft): Input: a single (time, list of energies) tuple Output: the spectral tilt slope and intercept, defined by regression on the energy between 500 Hz and 4 KHz. """ from stats import linregress # need this to calculate band = fft[1][6:48] # between 500 Hz and 4 KHz, assuming CD-qual audio slope,intercept,a,b,c = linregress(range(len(band)),band) # do the regress return slope,intercept
def calculate_test(self): try: values = [] variables = [] df = self.data.copy() df = df.sort_index() for i, index in enumerate(df.index): val = df.loc[index] for va in val: values.append(va) variables.append(int(self.samples[i])) #print(values,variables) reg = stats.linregress(values, variables) self.results.write("\nAll samples to exp. variables " + str(reg) + "\n") print("\nAll samples to exp. variables " + str(reg) + "\n") except: print("Error in regression")
def estimatePoissonYoung(principalAxis,stress=0,plot=False,cutoff=0.): """Estimate Poisson's ration given the "principal" axis of straining. For every base direction, homogenized strain is computed (slope in linear regression on discrete function particle coordinate → → particle displacement in the same direction as returned by utils.coordsAndDisplacements) and, (if axis '0' is the strained axis) the poisson's ratio is given as -½(ε₁+ε₂)/ε₀. Young's modulus is computed as σ/ε₀; if stress σ is not given (default 0), the result is 0. cutoff, if > 0., will take only smaller part (centered) or the specimen into account """ dd=[] # storage for linear regression parameters import pylab,numpy try: import stats except ImportError: raise ImportError("Unable to import stats; install the python-stats package.") from woo import utils if cutoff>0: cut=utils.fractionalBox(fraction=1-cutoff) for axis in [0,1,2]: if cutoff>0: w,dw=utils.coordsAndDisplacements(axis,Aabb=cut) else: w,dw=utils.coordsAndDisplacements(axis) l,ll=stats.linregress(w,dw)[0:2] # use only tangent and section dd.append((l,ll,min(w),max(w))) if plot: pylab.plot(w,dw,'.',label=r'$\Delta %s(%s)$'%('xyz'[axis],'xyz'[axis])) if plot: for axis in [0,1,2]: dist=dd[axis][-1]-dd[axis][-2] c=numpy.linspace(dd[axis][-2]-.2*dist,dd[axis][-1]+.2*dist) d=[dd[axis][0]*cc+dd[axis][1] for cc in c] pylab.plot(c,d,label=r'$\widehat{\Delta %s}(%s)$'%('xyz'[axis],'xyz'[axis])) pylab.legend(loc='upper left') pylab.xlabel(r'$x,\;y,\;z$') pylab.ylabel(r'$\Delta x,\;\Delta y,\; \Delta z$') pylab.show() otherAxes=(principalAxis+1)%3,(principalAxis+2)%3 avgTransHomogenizedStrain=.5*(dd[otherAxes[0]][0]+dd[otherAxes[1]][0]) principalHomogenizedStrain=dd[principalAxis][0] return -avgTransHomogenizedStrain/principalHomogenizedStrain,stress/principalHomogenizedStrain
def probplot(x, sparams=(), dist='norm', fit=True, plot=None): """ Calculate quantiles for a probability plot of sample data against a specified theoretical distribution. `probplot` optionally calculates a best-fit line for the data and plots the results using Matplotlib or a given plot function. Parameters ---------- x : array_like Sample/response data from which `probplot` creates the plot. sparams : tuple, optional Distribution-specific shape parameters (location(s) and scale(s)). dist : str, optional Distribution function name. The default is 'norm' for a normal probability plot. fit : bool, optional Fit a least-squares regression (best-fit) line to the sample data if True (default). plot : object, optional If given, plots the quantiles and least squares fit. `plot` is an object with methods "plot", "title", "xlabel", "ylabel" and "text". The matplotlib.pyplot module or a Matplotlib axes object can be used, or a custom object with the same methods. By default, no plot is created. Notes ----- Even if `plot` is given, the figure is not shown or saved by `probplot`; ``plot.show()`` or ``plot.savefig('figname.png')`` should be used after calling `probplot`. Returns ------- (osm, osr) : tuple of ndarrays Tuple of theoretical quantiles (osm, or order statistic medians) and ordered responses (osr). (slope, intercept, r) : tuple of floats, optional Tuple containing the result of the least-squares fit, if that is performed by `probplot`. `r` is the square root of the coefficient of determination. If ``fit=False`` and ``plot=None``, this tuple is not returned. Examples -------- >>> import scipy.stats as stats >>> nsample = 100 >>> np.random.seed(7654321) A t distribution with small degrees of freedom: >>> ax1 = plt.subplot(221) >>> x = stats.t.rvs(3, size=nsample) >>> res = stats.probplot(x, plot=plt) A t distribution with larger degrees of freedom: >>> ax2 = plt.subplot(222) >>> x = stats.t.rvs(25, size=nsample) >>> res = stats.probplot(x, plot=plt) A mixture of 2 normal distributions with broadcasting: >>> ax3 = plt.subplot(223) >>> x = stats.norm.rvs(loc=[0,5], scale=[1,1.5], size=(nsample/2.,2)).ravel() >>> res = stats.probplot(x, plot=plt) A standard normal distribution: >>> ax4 = plt.subplot(224) >>> x = stats.norm.rvs(loc=0, scale=1, size=nsample) >>> res = stats.probplot(x, plot=plt) """ N = len(x) Ui = zeros(N) * 1.0 Ui[-1] = 0.5**(1.0 /N) Ui[0] = 1 - Ui[-1] i = arange(2, N) Ui[1:-1] = (i - 0.3175) / (N + 0.365) try: ppf_func = eval('distributions.%s.ppf' % dist) except AttributeError: raise ValueError("%s is not a valid distribution with a ppf." % dist) if sparams is None: sparams = () if isscalar(sparams): sparams = (sparams,) if not isinstance(sparams, types.TupleType): sparams = tuple(sparams) """ res = inspect.getargspec(ppf_func) if not ('loc' == res[0][-2] and 'scale' == res[0][-1] and \ 0.0==res[-1][-2] and 1.0==res[-1][-1]): raise ValueError("Function has does not have default location " "and scale parameters\n that are 0.0 and 1.0 respectively.") if (len(sparams) < len(res[0])-len(res[-1])-1) or \ (len(sparams) > len(res[0])-3): raise ValueError("Incorrect number of shape parameters.") """ osm = ppf_func(Ui, *sparams) osr = sort(x) if fit or (plot is not None): # perform a linear fit. slope, intercept, r, prob, sterrest = stats.linregress(osm, osr) if plot is not None: plot.plot(osm, osr, 'o', osm, slope*osm + intercept) plot.title('Probability Plot') plot.xlabel('Quantiles') plot.ylabel('Ordered Values') xmin = amin(osm) xmax = amax(osm) ymin = amin(x) ymax = amax(x) posx = xmin + 0.70 * (xmax - xmin) posy = ymin + 0.01 * (ymax - ymin) plot.text(posx, posy, "r^2=%1.4f" % r) if fit: return (osm, osr), (slope, intercept, r) else: return osm, osr
print(print) print('pearsonr:') print(stats.pearsonr(l,m)) print(stats.pearsonr(a,b)) print('spearmanr:') print(stats.spearmanr(l,m)) print(stats.spearmanr(a,b)) print('pointbiserialr:') print(stats.pointbiserialr(pb,l)) print(stats.pointbiserialr(apb,a)) print('kendalltau:') print(stats.kendalltau(l,m)) print(stats.kendalltau(a,b)) print('linregress:') print(stats.linregress(l,m)) print(stats.linregress(a,b)) print('\nINFERENTIAL') print('ttest_1samp:') print(stats.ttest_1samp(l,12)) print(stats.ttest_1samp(a,12)) print('ttest_ind:') print(stats.ttest_ind(l,m)) print(stats.ttest_ind(a,b)) print('ttest_rel:') print(stats.ttest_rel(l,m)) print(stats.ttest_rel(a,b)) print('chisquare:') print(stats.chisquare(l)) print(stats.chisquare(a)) print('ks_2samp:')
print print print 'pearsonr:' print stats.pearsonr(l,m) print stats.pearsonr(a,b) print 'spearmanr:' print stats.spearmanr(l,m) print stats.spearmanr(a,b) print 'pointbiserialr:' print stats.pointbiserialr(pb,l) print stats.pointbiserialr(apb,a) print 'kendalltau:' print stats.kendalltau(l,m) print stats.kendalltau(a,b) print 'linregress:' print stats.linregress(l,m) print stats.linregress(a,b) print '\nINFERENTIAL' print 'ttest_1samp:' print stats.ttest_1samp(l,12) print stats.ttest_1samp(a,12) print 'ttest_ind:' print stats.ttest_ind(l,m) print stats.ttest_ind(a,b) print 'ttest_rel:' print stats.ttest_rel(l,m) print stats.ttest_rel(a,b) print 'chisquare:' print stats.chisquare(l) print stats.chisquare(a)
print print print 'pearsonr:' print stats.pearsonr(l, m) print stats.pearsonr(a, b) print 'spearmanr:' print stats.spearmanr(l, m) print stats.spearmanr(a, b) print 'pointbiserialr:' print stats.pointbiserialr(pb, l) print stats.pointbiserialr(apb, a) print 'kendalltau:' print stats.kendalltau(l, m) print stats.kendalltau(a, b) print 'linregress:' print stats.linregress(l, m) print stats.linregress(a, b) print '\nINFERENTIAL' print 'ttest_1samp:' print stats.ttest_1samp(l, 12) print stats.ttest_1samp(a, 12) print 'ttest_ind:' print stats.ttest_ind(l, m) print stats.ttest_ind(a, b) print 'ttest_rel:' print stats.ttest_rel(l, m) print stats.ttest_rel(a, b) print 'chisquare:' print stats.chisquare(l) print stats.chisquare(a)
#stats.paired(l,l) print() print() print('pearsonr:') print(stats.pearsonr(l,m)) print(stats.pearsonr(l,l)) print('spearmanr:') print('pointbiserialr:') print(stats.pointbiserialr(pb,l)) print(stats.pointbiserialr(pb,l)) print('kendalltau:') print(stats.kendalltau(l,m)) print(stats.kendalltau(l,l)) print('linregress:') print(stats.linregress(l,m)) print(stats.linregress(l,l)) print('\nINFERENTIAL') print('ttest_1samp:') print(stats.ttest_1samp(l,12)) print(stats.ttest_1samp(l,12)) print('ttest_ind:') print(stats.ttest_ind(l,m)) print(stats.ttest_ind(l,l)) print('chisquare:') print(stats.chisquare(l)) print(stats.chisquare(l)) print('ks_2samp:') print(stats.ks_2samp(l,m)) print(stats.ks_2samp(l,l))
#gap_mean = (recv_timestamps[-1] - recv_timestamps[0]) / (n-1) #recv_lateness = [recv_timestamps[i] - recv_timestamps[0] - gap_mean * i for i in range(n)] #for x in send_lateness: # print x #exit() diff_timestamps = [recv_timestamps[i] - send_timestamps[i] for i in range(n)] diff_min = numpy.min(diff_timestamps) #zero_diff_timestamps = [x - diff_min for x in diff_timestamps] #print stats.linregress(range(n), diff_timestamps) #exit() slope, intercept, _, _, _ = stats.linregress(range(n), diff_timestamps) #print slope, intercept corrected = [diff_timestamps[i] - (slope * i + intercept) for i in range(n)] diff_min = numpy.min(corrected) skewed = [x - diff_min for x in corrected] #print numpy.max(skewed), numpy.min(skewed), numpy.mean(skewed), numpy.std(skewed) for x in skewed: print x #for x in diff_timestamps: # print x - diff_min # print x
print('\n\nPoint-Biserial r') gender = list(map(float,[1,1,1,1,2,2,2,2,2,2])) score = list(map(float,[35, 38, 41, 40, 60, 65, 65, 68, 68, 64])) print('\nSHOULD BE +0.981257 (N=10) ... Basic Stats 1st ed, p.197') print(stats.pointbiserialr(gender,score)) print('\n\nLinear Regression') x = list(map(float,[1,1,2,2,2,3,3,3,4,4,4])) y = list(map(float,[2,4,4,6,2,4,7,8,6,8,7])) print('\nSHOULD BE 1.44, 1.47, 0.736, ???, 1.42 (N=11)... Basic Stats 1st ed, p.211-2') print(stats.linregress(x,y)) print('\n\nChi-Square') fo = list(map(float,[10,40])) print('\nSHOULD BE 18.0, <<<0.01 (df=1) ... Basic Stats 1st ed. p.457') print(stats.chisquare(fo)) print('\nSHOULD BE 5.556, 0.01<p<0.05 (df=1) ... Basic Stats 1st ed. p.460') print(stats.chisquare(fo,[5,45])) print('\n\nMann Whitney U') red = list(map(float,[540,480,600,590,605])) black = list(map(float,[760,890,1105,595,940])) print('\nSHOULD BE 2.0, 0.01<p<0.05 (N=5,5) ... Basic Stats 1st ed, p.473-4')