def test_betai(self): np.random.seed(12345) for i in range(10): a = np.random.rand() * 5. b = np.random.rand() * 200. assert_equal(stats.betai(a, b, 0.), 0.) assert_equal(stats.betai(a, b, 1.), 1.) assert_equal(stats.mstats.betai(a, b, 0.), 0.) assert_equal(stats.mstats.betai(a, b, 1.), 1.) x = np.random.rand() assert_almost_equal(stats.betai(a, b, x), stats.mstats.betai(a, b, x), decimal=13)
def test_betai(self): np.random.seed(12345) for i in range(10): a = np.random.rand() * 5. b = np.random.rand() * 200. assert_equal(stats.betai(a, b, 0.), 0.) assert_equal(stats.betai(a, b, 1.), 1.) assert_equal(stats.mstats.betai(a, b, 0.), 0.) assert_equal(stats.mstats.betai(a, b, 1.), 1.) x = np.random.rand() assert_almost_equal(stats.betai(a, b, x), stats.mstats.betai(a, b, x), decimal=13)
def test_betai(self): """ test incomplete beta function """ for i in range(10): a = np.random.rand()*5. b = np.random.rand()*200. assert_equal(stats.betai(a,b,0.),0.) assert_equal(stats.betai(a,b,1.),1.) assert_equal(stats.mstats.betai(a,b,0.),0.) assert_equal(stats.mstats.betai(a,b,1.),1.) for i in range(10): a = np.random.rand()*5. b = np.random.rand()*200. x = np.random.rand() assert_equal(stats.betai(a,b,x),stats.mstats.betai(a,b,x))
def test_betai(self): np.random.seed(12345) for i in range(10): a = np.random.rand() * 5. b = np.random.rand() * 200. with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=DeprecationWarning) assert_equal(stats.betai(a, b, 0.), 0.) assert_equal(stats.betai(a, b, 1.), 1.) assert_equal(stats.mstats.betai(a, b, 0.), 0.) assert_equal(stats.mstats.betai(a, b, 1.), 1.) x = np.random.rand() assert_almost_equal(stats.betai(a, b, x), stats.mstats.betai(a, b, x), decimal=13)
def test_betai(self): np.random.seed(12345) for i in range(10): a = np.random.rand() * 5. b = np.random.rand() * 200. with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=DeprecationWarning) assert_equal(stats.betai(a, b, 0.), 0.) assert_equal(stats.betai(a, b, 1.), 1.) assert_equal(stats.mstats.betai(a, b, 0.), 0.) assert_equal(stats.mstats.betai(a, b, 1.), 1.) x = np.random.rand() assert_almost_equal(stats.betai(a, b, x), stats.mstats.betai(a, b, x), decimal=13)
def get_pearsons_ps(pcorrel): from scipy.stats import betai df = pcorrel.shape[0] ix, iy = np.diag_indices_from(pcorrel) pcorrel[ix, iy] -= 1e-7 t_sq = pcorrel**2 * (df / ((1.0 - pcorrel) * (1.0 + pcorrel))) return betai(0.5 * df, 0.5, df / (df + t_sq))
def pearsonr(x, y): """ generalized from scipy.stats.pearsonr """ # x and y should have same length. x_shape = x.shape if len(x_shape) > 1: x = x.reshape((x_shape[0], prod(x_shape[1:]))) x = np.asarray(x) y = np.asarray(y) n = len(x) mx = x.mean(0) my = y.mean(0) xm, ym = x - mx, y - my r_num = n * np.dot(xm.T, ym) r_den = n * np.sqrt(np.outer(ss(xm), ss(ym, 0))) r = (r_num / r_den) # Presumably, if r > 1, then it is only some small artifact of floating # point arithmetic. r = np.minimum(r, 1.0) df = n - 2 # Use a small floating point value to prevent divide-by-zero nonsense # fixme: TINY is probably not the right value and this is probably not # the way to be robust. The scheme used in spearmanr is probably better. TINY = 1.0e-20 t = r * np.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY))) prob = betai(0.5 * df, 0.5, df / (df + t * t)) return r, prob
def p_adj_map_from_scores(r, n=3539): '''Creates a p map with adjusted p values from scores (correlations)''' from scipy.stats import betai df = n-2 t_squared = r*r * (df / ((1.0 - r) * (1.0 + r))) prob = betai(0.5*df, 0.5, df / (df+t_squared)) return fdrcorrection0(prob)
def pearson_corr(x, field): """Pearson correlation with 2-sided t-test Parameters: x: ndarray A 1D array time series. field: ndarray A 3D array of field values. The first dimension of the array needs to be time. Returns: (ndarray, ndarray) Two ndarrays. A 2D array of Pearson correlation values and a 2D array of p-values. Notes: The p-values returned by this function are from a two-sided Student's t-distribution. The test is against the null hypothesis that the correlation is not significantly different from "0". This function could use some more work. """ field = field.copy() f_oldshape = field.shape field.shape = (f_oldshape[0], f_oldshape[1] * f_oldshape[2]) n = len(x) df = n - 2 r = ((x[:, np.newaxis] * field).sum(axis=0) - n * x.mean() * field.mean(axis=0)) / ( np.sqrt(np.sum(x**2) - n * x.mean()**2) * np.sqrt(np.sum(field**2, axis=0) - n * field.mean(axis=0)**2)) t = r * np.sqrt(df / (1 - r**2)) p = stats.betai(0.5 * df, 0.5, df / (df + t * t)) r.shape = (f_oldshape[1], f_oldshape[2]) p.shape = r.shape return r, p
def adjust_r(r, n=3539, **fdr_params): from statsmodels.sandbox.stats.multicomp import fdrcorrection0 from scipy.stats import betai df = n - 2 t_squared = r * r * (df / ((1.0 - r) * (1.0 + r))) prob = betai(0.5 * df, 0.5, df / (df + t_squared)) return fdrcorrection0(prob)
def peak2sigma(psdpeak,n0): """ translates a psd peak height into a multi-trial NULL-hypothesis probability NOTE: dstarr replaces '0' with 0.000001 to catch float-point accuracy bugs Which I otherwise stumble into. """ # Student's-T prob0 = betai( 0.5*n0-2.,0.5,(n0-1.)/(n0-1.+2.*psdpeak) ) if (0.5*n0-2.<=0.000001): lprob0=0. elif ( (n0-1.)/(n0-1.+2.*psdpeak) <=0.000001 ): lprob0=-999. elif (prob0==0): lprob0=(0.5*n0-2.)*log( (n0-1.)/(n0-1.+2.*psdpeak) ) - log(0.5*n0-2.) - betaln(0.5*n0-2.,0.5) else: lprob0=log(prob0) # ballpark number of independent frequencies # (Horne and Baliunas, eq. 13) horne = long(-6.362+1.193*n0+0.00098*n0**2.) if (horne <= 0): horne=5 if (lprob0>log(1.e-4) and prob0>0): # trials correction, monitoring numerical precision lprob = log( 1. - exp( horne*log(1-prob0) ) ) elif (lprob0+log(horne)>log(1.e-4) and prob0>0): lprob = log( 1. - exp( -horne*prob0 ) ) else: lprob = log(horne) + lprob0 sigma = lprob2sigma(lprob) return sigma
def p_map_from_scores(r, n=3539): '''Creates a p map from scores (correlations)''' from scipy.stats import betai df = n-2 t_squared = r*r * (df / ((1.0 - r) * (1.0 + r))) prob = betai(0.5*df, 0.5, df / (df+t_squared)) return prob
def pearsonr(x, y): """ generalized from scipy.stats.pearsonr """ # x and y should have same length. x_shape = x.shape if len(x_shape) > 1: x = x.reshape((x_shape[0],prod(x_shape[1:]))) x = np.asarray(x) y = np.asarray(y) n = len(x) mx = x.mean(0) my = y.mean(0) xm, ym = x-mx, y-my r_num = n*np.dot(xm.T,ym) r_den = n*np.sqrt(np.outer(ss(xm),ss(ym,0))) r = (r_num / r_den) # Presumably, if r > 1, then it is only some small artifact of floating # point arithmetic. r = np.minimum(r, 1.0) df = n-2 # Use a small floating point value to prevent divide-by-zero nonsense # fixme: TINY is probably not the right value and this is probably not # the way to be robust. The scheme used in spearmanr is probably better. TINY = 1.0e-20 t = r*np.sqrt(df/((1.0-r+TINY)*(1.0+r+TINY))) prob = betai(0.5*df,0.5,df/(df+t*t)) return r,prob
def pearsonp(r, n): from scipy.stats import betai if abs(r) == 1: return 0 else: df = n-2 t_squared = r*r * (df / ((1.0 - r) * (1.0 + r))) return betai(0.5*df, 0.5, df / (df + t_squared))
def p_from_r(r, n): r = max(min(r, 1.0), -1.0) df = n - 2 if abs(r) == 1.0: prob = 0.0 else: t_squared = r * r * (df / ((1.0 - r) * (1.0 + r))) prob = stats.betai(0.5 * df, 0.5, df / (df + t_squared)) return prob
def corrcoef(matrix): r = np.corrcoef(matrix) rf = r[np.triu_indices(r.shape[0], 1)] df = matrix.shape[1] - 2 ts = rf * rf * (df / (1 - rf * rf)) pf = betai(0.5 * df, 0.5, df / (df + ts)) p = np.zeros(shape=r.shape) p[np.triu_indices(p.shape[0], 1)] = pf p[np.tril_indices(p.shape[0], -1)] = pf p[np.diag_indices(p.shape[0])] = np.ones(p.shape[0]) return r, p
def p_map_from_predictions(preds_pc, data_to_map): '''Creates a p map from predictions''' from sklearn.preprocessing import StandardScaler from scipy.stats import betai mx = StandardScaler().fit_transform(preds_pc) my = StandardScaler().fit_transform(data_to_map) n = mx.shape[0] r = (1/(n-1))*((mx*my).sum(axis=0)) df = n-2 t_squared = r*r * (df / ((1.0 - r) * (1.0 + r))) prob = betai(0.5*df, 0.5, df / (df+t_squared)) return prob
def corrcoef_matrix(matrix): # Code originating from http://stackoverflow.com/a/24547964 by http://stackoverflow.com/users/2455058/jingchao r = np.corrcoef(matrix) rf = r[np.triu_indices(r.shape[0], 1)] df = matrix.shape[1] - 2 ts = rf * rf * (df / (1 - rf * rf)) pf = betai(0.5 * df, 0.5, df / (df + ts)) p = np.zeros(shape=r.shape) p[np.triu_indices(p.shape[0], 1)] = pf p[np.tril_indices(p.shape[0], -1)] = pf p[np.diag_indices(p.shape[0])] = np.ones(p.shape[0]) return r, p
def check_sample_mean(sm, v, n, popmean): # from stats.stats.ttest_1samp(a, popmean): # Calculates the t-obtained for the independent samples T-test on ONE group # of scores a, given a population mean. # # Returns: t-value, two-tailed prob df = n - 1 svar = ((n - 1) * v) / float(df) # looks redundant t = (sm - popmean) / np.sqrt(svar * (1.0 / n)) prob = stats.betai(0.5 * df, 0.5, df / (df + t * t)) # return t,prob npt.assert_(prob > 0.01, "mean fail, t,prob = %f, %f, m, sm=%f,%f" % (t, prob, popmean, sm))
def check_sample_mean(sm,v,n, popmean): # from stats.stats.ttest_1samp(a, popmean): # Calculates the t-obtained for the independent samples T-test on ONE group # of scores a, given a population mean. # # Returns: t-value, two-tailed prob df = n-1 svar = ((n-1)*v) / float(df) # looks redundant t = (sm-popmean) / np.sqrt(svar*(1.0/n)) prob = stats.betai(0.5*df, 0.5, df/(df+t*t)) # return t,prob npt.assert_(prob > 0.01, 'mean fail, t,prob = %f, %f, m, sm=%f,%f' % (t, prob, popmean, sm))
def correlation_matrix_vector(matrix, vector): '''Description here''' '''Matrix shape T,N''' '''Vector shape T ''' r = np.ones(shape=(matrix.shape[0])) p = np.ones(shape=(matrix.shape[0])) nt = matrix.shape[0] #Time dimension data1_norm = (matrix - matrix.mean(axis=0)) / matrix.std(axis=0) data2_norm = (vector - vector.mean()) / vector.std() r = np.sum(np.swapaxes(data1_norm, 0, 1) * data2_norm / float(nt), axis=1) df = nt - 2 #DOF t_squared = r * r * (df / ((1.0 - r) * (1.0 + r))) p = betai(0.5 * df, 0.5, df / (df + t_squared)) return r, p
def corrcoef(matrix): """ Received code from following link: http://stackoverflow.com/questions/24432101/correlation-coefficients-and-p-v alues-for-all-pairs-of-rows-of-a-matrix """ r = np.corrcoef(matrix) rf = r[np.triu_indices(r.shape[0], 1)] df = matrix.shape[1] - 2 ts = rf * rf * (df / (1 - rf * rf)) pf = betai(0.5 * df, 0.5, df / (df + ts)) p = np.zeros(shape=r.shape) p[np.triu_indices(p.shape[0], 1)] = pf p[np.tril_indices(p.shape[0], -1)] = pf p[np.diag_indices(p.shape[0])] = np.ones(p.shape[0]) return r, p
def chi2sigma(chi0,chi1,nu0,nharm): from scipy.stats import betai from scipy.special import betaln nu1 = nu0 - 2.*nharm dfn = nu0-nu1 dfd = nu1 sigma = 0. if (dfn>0 and dfd>0 and chi0>chi1): fstat = (chi0/chi1-1.)*dfd/dfn prob = betai( dfd/2., dfn/2., dfd/(dfd+dfn*fstat) ) if (dfd<=0 or dfn<=0): lprob=0. elif (chi1==0): lprob=-999. elif (prob==0): lprob = 0.5*dfd*log( dfd/(dfd+dfn*fstat) )-log(dfd/2.)-betaln(dfd/2.,dfn/2.) else: lprob = log(prob) sigma = lprob2sigma(lprob) return sigma
def check_sample_mean(sm,v,n, popmean): """ from stats.stats.ttest_1samp(a, popmean): Calculates the t-obtained for the independent samples T-test on ONE group of scores a, given a population mean. Returns: t-value, two-tailed prob """ ## a = asarray(a) ## x = np.mean(a) ## v = np.var(a, ddof=1) ## n = len(a) df = n-1 svar = ((n-1)*v) / float(df) #looks redundant t = (sm-popmean)/np.sqrt(svar*(1.0/n)) prob = stats.betai(0.5*df,0.5,df/(df+t*t)) #return t,prob assert prob>0.01, 'mean fail, t,prob = %f, %f, m,sm=%f,%f' % (t,prob,popmean,sm)
def check_sample_mean(sm,v,n, popmean): """ from stats.stats.ttest_1samp(a, popmean): Calculates the t-obtained for the independent samples T-test on ONE group of scores a, given a population mean. Returns: t-value, two-tailed prob """ ## a = asarray(a) ## x = np.mean(a) ## v = np.var(a, ddof=1) ## n = len(a) df = n-1 svar = ((n-1)*v) / float(df) # looks redundant t = (sm-popmean)/np.sqrt(svar*(1.0/n)) prob = stats.betai(0.5*df,0.5,df/(df+t*t)) # return t,prob npt.assert_(prob > 0.01, 'mean fail, t,prob = %f, %f, m,sm=%f,%f' % (t,prob,popmean,sm))
def reciprocity(G, nbunch = None, weight = None): if nbunch is not None: nodes = np.sort(G.nodes()) nbunch = np.sort(nbunch) fnodes = np.setdiff1d(nodes,nbunch) nodelist = np.append(nbunch, fnodes) else: nbunch = G.nodes() nodelist = G.nodes() fnodes = list() W = np.array(nx.to_numpy_matrix(G, nodelist = nodelist)) indices = np.diag_indices_from(W) W[indices] = 0. if weight is None: W = 1. * (W > 0) l = float(W.sum()) n = len(nbunch) m = len(fnodes) df = n * (n - 1 + 2 * m) a = l / df # this is to take into account that the maximal number of observable links is lower than (n+m)*(n+m-1) W[n:,n:] = a # this is to set the terms corresponding to unobserved exposures to zero l2 = (W**2).sum() omega = l2 / l rho = (W * W.T).sum() / l rho = (rho - a) / (omega - a) rho = max(min(rho, 1.0), - 1.0) if abs(rho) == 1.0: prob = 0.0 else: t_squared = rho * rho * (df / ((1.0 - rho) * (1.0 + rho))) prob = betai(0.5*df, 0.5, df / (df + t_squared)) return rho,prob
def chi2sigma(chi0, chi1, nu0, nharm): from scipy.stats import betai from scipy.special import betaln nu1 = nu0 - 2. * nharm dfn = nu0 - nu1 dfd = nu1 sigma = 0. if (dfn > 0 and dfd > 0 and chi0 > chi1): fstat = (chi0 / chi1 - 1.) * dfd / dfn prob = betai(dfd / 2., dfn / 2., dfd / (dfd + dfn * fstat)) if (dfd <= 0 or dfn <= 0): lprob = 0. elif (chi1 == 0): lprob = -999. elif (prob == 0): lprob = 0.5 * dfd * log(dfd / (dfd + dfn * fstat)) - log( dfd / 2.) - betaln(dfd / 2., dfn / 2.) else: lprob = log(prob) sigma = lprob2sigma(lprob) return sigma
def genGBM(self, alpha, beta, mu, sigma): """This function produces a time-series based on Geometric Brownian Motion (GBM), filtered through a beta distribution for scaling. ***THIS NEEDS WORK*** """ self.prices = [np.random.binomial(1,.5)] t = np.arange(0,1,step=.01) S0 = np.random.random() Wt = np.cumsum(np.random.randn(100)) signal = S0 * np.exp((mu-sigma**2/2)*t + sigma*Wt) res = stats.betai(alpha, beta, abs(signal/max(signal))) for i in res: self.prices.append(i) self.prices = list(reversed(self.prices)) self.prices = np.around(self.prices,decimals=2) self.pricesNO = [abs(1-x) for x in self.prices] self.pricesNO = np.around(self.pricesNO,decimals=2)
def correlation_2_arrays(data1, data2, axis=0): '''Description here''' r = np.ones(shape=(data1.shape[0])) p = np.ones(shape=(data1.shape[0])) nt = data1.shape[axis] assert data1.shape == data2.shape view1 = data1 view2 = data2 if axis: view1 = np.rollaxis(data1, axis) view2 = np.rollaxis(data2, axis) data1_norm = (view1 - data1.mean(axis=axis)) / data1.std(axis=axis) data2_norm = (view2 - data2.mean(axis=axis)) / data2.std(axis=axis) r = np.sum(data1_norm * data2_norm / float(nt), axis=0) df = nt - 2 t_squared = r * r * (df / ((1.0 - r) * (1.0 + r))) p = betai(0.5 * df, 0.5, df / (df + t_squared)) return r, p
def genVarGamma_beta(self, alpha, beta, mu, sigma, theta, nu, plot=False): """Generates 100 random variables that are variance gamma distriuted and then filtered through a beta distribution for scaling. ***THIS SEEMS TO WORK*** """ t = np.arange(0,1,step=.01) self.prices = [np.random.binomial(1,.5)] signal = vg.rnd(100, mu, sigma, theta, nu) res = stats.betai(alpha, beta, np.abs(signal)) for i in res: self.prices.append(i) self.prices = list(reversed(self.prices)) self.prices = np.around(self.prices,decimals=2) self.pricesNO = [abs(1-x) for x in self.prices] self.pricesNO = np.around(self.pricesNO,decimals=2) if plot == True: plt.plot(self.prices)
def genABM_beta(self, alpha, beta, mu, sigma, plot=False): """This function produces a time-series based on Arithmetic Brownian Motion (ABM), filtered through a beta distribution for scaling. ***THIS NEEDS WORK*** """ t = np.arange(0,1,step=.01) self.prices = [np.random.binomial(1,.5)] Wt = np.cumsum(np.random.randn(100)) signal = self.prices[0] + ((mu-sigma**2/2)*t + sigma*Wt) res = stats.betai(alpha, beta, abs(signal/max(signal))) for i in res: self.prices.append(i) self.prices = list(reversed(self.prices)) self.prices = np.around(self.prices,decimals=2) self.pricesNO = [abs(1-x) for x in self.prices] self.pricesNO = np.around(self.pricesNO,decimals=2) if plot == True: plt.plot(self.prices)
def peak2sigma(psdpeak, n0): """ translates a psd peak height into a multi-trial NULL-hypothesis probability NOTE: dstarr replaces '0' with 0.000001 to catch float-point accuracy bugs Which I otherwise stumble into. """ # Student's-T prob0 = betai(0.5 * n0 - 2., 0.5, (n0 - 1.) / (n0 - 1. + 2. * psdpeak)) if (0.5 * n0 - 2. <= 0.000001): lprob0 = 0. elif ((n0 - 1.) / (n0 - 1. + 2. * psdpeak) <= 0.000001): lprob0 = -999. elif (prob0 == 0): lprob0 = (0.5 * n0 - 2.) * log( (n0 - 1.) / (n0 - 1. + 2. * psdpeak)) - log(0.5 * n0 - 2.) - betaln( 0.5 * n0 - 2., 0.5) else: lprob0 = log(prob0) # ballpark number of independent frequencies # (Horne and Baliunas, eq. 13) horne = long(-6.362 + 1.193 * n0 + 0.00098 * n0**2.) if (horne <= 0): horne = 5 if (lprob0 > log(1.e-4) and prob0 > 0): # trials correction, monitoring numerical precision lprob = log(1. - exp(horne * log(1 - prob0))) elif (lprob0 + log(horne) > log(1.e-4) and prob0 > 0): lprob = log(1. - exp(-horne * prob0)) else: lprob = log(horne) + lprob0 sigma = lprob2sigma(lprob) return sigma
def f_test_probability(N, p1, Chi2_1, p2, Chi2_2): """Return F-Test probability that the simpler model is correct. e.g. p1 = 5.; //number of PPM parameters e.g. p2 = p1 + 7.; // number of PPM + orbital parameters :param N: int Number of data points :param p1: int Number of parameters of the simpler model :param Chi2_1: float chi^2 corresponding to the simpler model :param p2: int Number of parameters of the model with more parameters p2 > p1 :param Chi2_2: float chi^2 corresponding to the model with more parameters :return: prob: float probability """ nu1 = p2 - p1 nu2 = N - p2 # degrees of freedom if (Chi2_1 < Chi2_2): raise RuntimeWarning('Solution better with less parameters') # F test F0 = nu2 / nu1 * (Chi2_1 - Chi2_2) / Chi2_2 # probability prob = betai(0.5 * nu2, 0.5 * nu1, nu2 / (nu2 + F0 * nu1)) return prob
def doCorrelationIDR(self,ID,layer1,layer2): # first get stats for each layer [layer1sum, layer1n]=self.sumLayer(layer1[0],layer1[1]) [layer2sum, layer2n]=self.sumLayer(layer2[0],layer2[1]) layer1mean=layer1sum/layer1n layer2mean=layer2sum/layer2n # get layer extents based on first layer xMin=layer1[0].extent().xMinimum() xMax=layer1[0].extent().xMaximum() yMin=layer1[0].extent().yMinimum() yMax=layer1[0].extent().yMaximum() xDim=layer1[0].width() yDim=layer1[0].height() xSize=(xMax-xMin)/float(xDim) ySize=(yMax-yMin)/float(yDim) # initialise summing variables [mySum,mySumz1m,mySumz2m,myN]=[0,0,0,0] myNDV=QString(u'null (no data)') myOE=QString(u'out of extent') # loop through pixels in first layer for i in range(xDim): x=xMin+(xSize/2)+(i*xSize) for j in range(yDim): y=yMin+(ySize/2)+(j*ySize) # fetch values for this point z1=layer1[0].identify(QgsPoint(x,y))[1].values()[layer1[1]] z2=layer2[0].identify(QgsPoint(x,y))[1].values()[layer2[1]] # only consider where both grids are valid if not (z1==myNDV or z1==myOE or z2==myNDV or z2==myOE): z1=float(z1) z2=float(z2) myN+=1 if ID=="I": mySum+=pow(pow(z1/layer1sum,0.5)-pow(z2/layer2sum,0.5),2) elif ID=="D": mySum+=abs(z1/layer1sum - z2/layer2sum) elif ID=="R": z1m=z1-layer1mean z2m=z2-layer2mean mySum+=z1m*z2m mySumz1m+=pow(z1m,2) mySumz2m+=pow(z2m,2) [myCor,myP]=[None,None] # final calculations if ID=="I": myCor= 1 - (0.5 * pow(mySum,0.5)) myP=None elif ID=="D": myCor= 1 - (0.5 * mySum) myP=None elif ID=="R": if mySumz1m*mySumz1m>0: myCor= mySum / (pow(mySumz1m,0.5)*pow(mySumz2m,0.5)) myDF=myN-2 myPprelim=myCor*pow(myDF/((1-myCor)*(1+myCor)),0.5) myP=betai(0.5*myDF,0.5,(myDF/(myDF+pow(myPprelim,2)))) return [myCor,myP]
def __call__(self, table, weight=None, verbose=0): """ :param table: data instances. :type table: :class:`Orange.data.Table` :param weight: the weights for instances. Default: None, i.e. all data instances are eqaully important in fitting the regression parameters :type weight: None or list of Orange.feature.Continuous which stores weights for instances """ if not self.use_vars is None: new_domain = Orange.data.Domain(self.use_vars, table.domain.class_var) new_domain.addmetas(table.domain.getmetas()) table = Orange.data.Table(new_domain, table) # dicrete values are continuized table = self.continuize_table(table) # missing values are imputed table = self.impute_table(table) if self.stepwise: use_vars = stepwise(table, weight, add_sig=self.add_sig, remove_sig=self.remove_sig) new_domain = Orange.data.Domain(use_vars, table.domain.class_var) new_domain.addmetas(table.domain.getmetas()) table = Orange.data.Table(new_domain, table) # convertion to numpy A, y, w = table.to_numpy() if A is None: n, m = len(table), 0 else: n, m = numpy.shape(A) if self.intercept: if A is None: X = numpy.ones([n, 1]) else: X = numpy.insert(A, 0, 1, axis=1) # adds a column of ones else: X = A domain = table.domain if numpy.std(y) < 10e-6: # almost constant variable return Orange.regression.mean.MeanLearner(table) # set weights to the instances W = numpy.identity(n) if weight: for i, ins in enumerate(table): W[i, i] = float(ins[weight]) compute_stats = self.compute_stats # adds some robustness by computing the pseudo inverse; # normal inverse could fail due to singularity of the X.T * W * X if self.ridge_lambda is None: cov = pinv(dot(dot(X.T, W), X)) else: cov = pinv( dot(dot(X.T, W), X) - self.ridge_lambda * numpy.eye(m + 1)) compute_stats = False # TO DO: find inferential properties of the estimators D = dot(dot(cov, X.T), W) coefficients = dot(D, y) mu_y, sigma_y = numpy.mean(y), numpy.std(y) if A is not None: cov_x = numpy.cov(X, rowvar=0) # standardized coefficients std_coefficients = (sqrt(cov_x.diagonal()) / sigma_y) \ * coefficients else: std_coefficients = None if compute_stats is False: return LinearRegression(domain.class_var, domain, coefficients=coefficients, std_coefficients=std_coefficients, intercept=self.intercept) fitted = dot(X, coefficients) residuals = [ins.get_class() - fitted[i] \ for i, ins in enumerate(table)] # model summary # total sum of squares (total variance) sst = numpy.sum((y - mu_y)**2) # sum of squares due to regression (explained variance) ssr = numpy.sum((fitted - mu_y)**2) # error sum of squares (unexplaied variance) sse = sst - ssr # coefficient of determination r2 = ssr / sst r2adj = 1 - (1 - r2) * (n - 1) / (n - m - 1) F = (ssr / m) / (sst - ssr / (n - m - 1)) df = n - 2 sigma_square = sse / (n - m - 1) # standard error of the regression estimator, t-scores and p-values std_error = sqrt(sigma_square * pinv(dot(X.T, X)).diagonal()) t_scores = coefficients / std_error p_vals = [stats.betai(df*0.5,0.5,df/(df + t*t)) \ for t in t_scores] # dictionary of regression coefficients with standard errors # and p-values dict_model = {} if self.intercept: dict_model["Intercept"] = (coefficients[0],\ std_error[0], \ t_scores[0], \ p_vals[0]) for i, var in enumerate(domain.attributes): j = i + 1 if self.intercept else i dict_model[var.name] = (coefficients[j], \ std_error[j],\ t_scores[j],\ p_vals[j]) return LinearRegression(domain.class_var, domain, coefficients, F, std_error=std_error, t_scores=t_scores, p_vals=p_vals, dict_model=dict_model, fitted=fitted, residuals=residuals, m=m, n=n, mu_y=mu_y, r2=r2, r2adj=r2adj, sst=sst, sse=sse, ssr=ssr, std_coefficients=std_coefficients, intercept=self.intercept)
psd, freqs, signi, sim_signi, peak_sort = lomb(noisetime,noisedata,delta_time=dnoisedata, signal_err=dnoisedata,freqin=frequencies,fap=fap,multiple=multiple) #peak location imax = psd.argmax() freq_max = freqs[imax] mpsd=max(psd) print ("Peak=%.2f @ %.2f Hz, significance estimate: %.1f-sigma (T-test)") % (mpsd,freq_max,signi) if (len(peak_sort)>0): psd0 = peak_sort[ long((1-fap)*(multiple-1)) ] print ("Expected peak %.2f for False Alarm of %.2e") % (psd0,fap) Prob0 = betai( 0.5*N-2.,0.5,(N-1.)/(N-1.+2.*psd0) ) Nindep = log(1-fap)/log(1-Prob0) horne = long(-6.362+1.193*N+0.00098*N**2.) if (horne <= 0): horne=5 print ("Estimated number of independent trials: %.2f (horne=%d)") % (Nindep,horne) nover = sum( peak_sort>=mpsd ) print ("Fraction of simulations with peak greater than observed value: %d/%d") % (nover,multiple) """ import Gnuplot import time plotobj = Gnuplot.Gnuplot() plotobj.xlabel('Period (s)') plotobj.ylabel('LS Periodogram') plotobj('set logscale x')
multiple=multiple) #peak location imax = psd.argmax() freq_max = freqs[imax] mpsd = max(psd) print("Peak=%.2f @ %.2f Hz, significance estimate: %.1f-sigma (T-test)" ) % (mpsd, freq_max, signi) if (len(peak_sort) > 0): psd0 = peak_sort[long((1 - fap) * (multiple - 1))] print("Expected peak %.2f for False Alarm of %.2e") % (psd0, fap) Prob0 = betai(0.5 * N - 2., 0.5, (N - 1.) / (N - 1. + 2. * psd0)) Nindep = log(1 - fap) / log(1 - Prob0) horne = long(-6.362 + 1.193 * N + 0.00098 * N**2.) if (horne <= 0): horne = 5 print("Estimated number of independent trials: %.2f (horne=%d)") % ( Nindep, horne) nover = sum(peak_sort >= mpsd) print( "Fraction of simulations with peak greater than observed value: %d/%d" ) % (nover, multiple) """ import Gnuplot import time plotobj = Gnuplot.Gnuplot() plotobj.xlabel('Period (s)')
def __call__(self, table, weight=None, verbose=0): """ :param table: data instances. :type table: :class:`Orange.data.Table` :param weight: the weights for instances. Default: None, i.e. all data instances are equally important in fitting the regression parameters :type weight: None or list of Orange.feature.Continuous which stores weights for instances """ if self.use_vars is not None: new_domain = Orange.data.Domain(self.use_vars, table.domain.class_var) new_domain.addmetas(table.domain.getmetas()) table = Orange.data.Table(new_domain, table) # discrete values are continuized table = self.continuize_table(table) # missing values are imputed table = self.impute_table(table) if self.stepwise: use_vars = stepwise(table, weight, add_sig=self.add_sig, remove_sig=self.remove_sig) new_domain = Orange.data.Domain(use_vars, table.domain.class_var) new_domain.addmetas(table.domain.getmetas()) table = Orange.data.Table(new_domain, table) domain = table.domain # convert to numpy X, y, w = table.to_numpy() n, m = numpy.shape(X) if self.intercept: X = numpy.insert(X, 0, 1, axis=1) # adds a column of ones if weight: weights = numpy.sqrt([float(ins[weight]) for ins in table]) X = weights.reshape(n, 1) * X y = weights * y cov = dot(X.T, X) if self.ridge_lambda: stride = cov.shape[0] + 1 cov.flat[self.intercept * stride::stride] += self.ridge_lambda # adds some robustness by computing the pseudo inverse; # normal inverse could fail due to the singularity of X.T * X invcov = pinv(cov) D = dot(invcov, X.T) coefficients = dot(D, y) mu_y, sigma_y = numpy.mean(y), numpy.std(y) if m > 0: # standardized coefficients std_coefficients = std(X, axis=0, ddof=1) / sigma_y * coefficients else: std_coefficients = None # TODO: find inferential properties of the estimators for ridge if self.compute_stats is False or self.ridge_lambda: return LinearRegression(domain.class_var, domain, coefficients=coefficients, std_coefficients=std_coefficients, intercept=self.intercept) fitted = dot(X, coefficients) residuals = [ins.get_class() - fitted[i] for i, ins in enumerate(table)] # model summary df_reg = n - m - self.intercept # total sum of squares (total variance) sst = numpy.sum((y - mu_y) ** 2) # regression sum of squares (explained variance) ssr = numpy.sum((fitted - mu_y) ** 2) # residual sum of squares sse = numpy.sum((y - fitted) ** 2) # coefficient of determination r2 = ssr / sst r2 = 1 - sse / sst r2adj = 1 - (1 - r2) * (n - 1) / df_reg F = (ssr / m) / ((sst - ssr) / df_reg) if m else 0 sigma_square = sse / df_reg # standard error of the regression estimator, t-scores and p-values std_error = sqrt(sigma_square * invcov.diagonal()) t_scores = coefficients / std_error df_res = n - 2 p_vals = [stats.betai(df_res * 0.5, 0.5, df_res / (df_res + t * t)) for t in t_scores] # dictionary of regression coefficients with standard errors # and p-values dict_model = {} if self.intercept: dict_model["Intercept"] = (coefficients[0], std_error[0], t_scores[0], p_vals[0]) for i, var in enumerate(domain.features): j = i + 1 if self.intercept else i dict_model[var.name] = (coefficients[j], std_error[j], t_scores[j], p_vals[j]) return LinearRegression(domain.class_var, domain, coefficients, F, std_error=std_error, t_scores=t_scores, p_vals=p_vals, dict_model=dict_model, fitted=fitted, residuals=residuals, m=m, n=n, mu_y=mu_y, r2=r2, r2adj=r2adj, sst=sst, sse=sse, ssr=ssr, std_coefficients=std_coefficients, intercept=self.intercept)
def __call__(self, table, weight=None, verbose=0): """ :param table: data instances. :type table: :class:`Orange.data.Table` :param weight: the weights for instances. Default: None, i.e. all data instances are eqaully important in fitting the regression parameters :type weight: None or list of Orange.feature.Continuous which stores weights for instances """ if not self.use_vars is None: new_domain = Orange.data.Domain(self.use_vars, table.domain.class_var) new_domain.addmetas(table.domain.getmetas()) table = Orange.data.Table(new_domain, table) # dicrete values are continuized table = self.continuize_table(table) # missing values are imputed table = self.impute_table(table) if self.stepwise: use_vars = stepwise(table, weight, add_sig=self.add_sig, remove_sig=self.remove_sig) new_domain = Orange.data.Domain(use_vars, table.domain.class_var) new_domain.addmetas(table.domain.getmetas()) table = Orange.data.Table(new_domain, table) # convertion to numpy A, y, w = table.to_numpy() if A is None: n, m = len(table), 0 else: n, m = numpy.shape(A) if self.intercept: if A is None: X = numpy.ones([n,1]) else: X = numpy.insert(A, 0, 1, axis=1) # adds a column of ones else: X = A domain = table.domain if numpy.std(y) < 10e-6: # almost constant variable return Orange.regression.mean.MeanLearner(table) # set weights to the instances W = numpy.identity(n) if weight: for i, ins in enumerate(table): W[i, i] = float(ins[weight]) compute_stats = self.compute_stats # adds some robustness by computing the pseudo inverse; # normal inverse could fail due to singularity of the X.T * W * X if self.ridge_lambda is None: cov = pinv(dot(dot(X.T, W), X)) else: cov = pinv(dot(dot(X.T, W), X) - self.ridge_lambda*numpy.eye(m+1)) compute_stats = False # TO DO: find inferential properties of the estimators D = dot(dot(cov, X.T), W) coefficients = dot(D, y) mu_y, sigma_y = numpy.mean(y), numpy.std(y) if A is not None: cov_x = numpy.cov(X, rowvar=0) # standardized coefficients std_coefficients = (sqrt(cov_x.diagonal()) / sigma_y) \ * coefficients else: std_coefficients = None if compute_stats is False: return LinearRegression(domain.class_var, domain, coefficients=coefficients, std_coefficients=std_coefficients, intercept=self.intercept) fitted = dot(X, coefficients) residuals = [ins.get_class() - fitted[i] \ for i, ins in enumerate(table)] # model summary # total sum of squares (total variance) sst = numpy.sum((y - mu_y) ** 2) # sum of squares due to regression (explained variance) ssr = numpy.sum((fitted - mu_y)**2) # error sum of squares (unexplaied variance) sse = sst - ssr # coefficient of determination r2 = ssr / sst r2adj = 1-(1-r2)*(n-1)/(n-m-1) F = (ssr/m)/(sst-ssr/(n-m-1)) df = n-2 sigma_square = sse/(n-m-1) # standard error of the regression estimator, t-scores and p-values std_error = sqrt(sigma_square*pinv(dot(X.T, X)).diagonal()) t_scores = coefficients/std_error p_vals = [stats.betai(df*0.5,0.5,df/(df + t*t)) \ for t in t_scores] # dictionary of regression coefficients with standard errors # and p-values dict_model = {} if self.intercept: dict_model["Intercept"] = (coefficients[0],\ std_error[0], \ t_scores[0], \ p_vals[0]) for i, var in enumerate(domain.attributes): j = i + 1 if self.intercept else i dict_model[var.name] = (coefficients[j], \ std_error[j],\ t_scores[j],\ p_vals[j]) return LinearRegression(domain.class_var, domain, coefficients, F, std_error=std_error, t_scores=t_scores, p_vals=p_vals, dict_model=dict_model, fitted=fitted, residuals=residuals, m=m, n=n, mu_y=mu_y, r2=r2, r2adj=r2adj, sst=sst, sse=sse, ssr=ssr, std_coefficients=std_coefficients, intercept=self.intercept)