def generate_train_test_phenotypes(betas, train_snps, test_snps, h2=0.01): """ Generate genotypes given betas and SNPs """ (m, n) = train_snps.shape (test_m, test_n) = test_snps.shape assert len(betas) == m == test_m, 'WTF?' #Training phenotypes phen_noise = stats.norm.rvs(0, sp.sqrt(1.0 - h2), size=n) phen_noise = sp.sqrt((1.0 - h2) / sp.var(phen_noise)) * phen_noise genetic_part = sp.dot(train_snps.T, betas) genetic_part = sp.sqrt(h2 / sp.var(genetic_part)) * genetic_part train_phen = genetic_part + phen_noise # print 'Herit:', sp.var(genetic_part) / sp.var(train_phen) ret_dict = {} ret_dict['phen'] = train_phen betas_marg = (1. / n) * sp.dot(train_phen, train_snps.T) ret_dict['betas_marg'] = betas_marg #Testing phenotypes phen_noise = stats.norm.rvs(0, sp.sqrt(1.0 - h2), size=test_n) phen_noise = sp.sqrt((1.0 - h2) / sp.var(phen_noise)) * phen_noise genetic_part = sp.dot(test_snps.T, betas) genetic_part = sp.sqrt(h2 / sp.var(genetic_part)) * genetic_part test_phen = genetic_part + phen_noise ret_dict['test_phen'] = test_phen return ret_dict
def generate_test_data_w_sum_stats(h2=0.5, n=100000, n_sample=100, m=50000, model='gaussian', p=1.0, conseq_r2=0, m_ld_chunk_size=100): """ Generate """ #Get LD sample matrix D_sample = genotypes.get_sample_D(200,conseq_r2=conseq_r2,m=m_ld_chunk_size) #Simulate beta_hats ret_dict = simulate_beta_hats(h2=h2, n=n, n_sample=n_sample, m=m, model=model, p=p, conseq_r2=conseq_r2, m_ld_chunk_size=m_ld_chunk_size, D_sample=D_sample) #Simulate test genotypes test_snps = genotypes.simulate_genotypes_w_ld(n_sample=n_sample, m=m, conseq_r2=conseq_r2, m_ld_chunk_size=m_ld_chunk_size) ret_dict['test_snps'] = test_snps #Simulate test phenotypes phen_noise = stats.norm.rvs(0, sp.sqrt(1.0 - h2), size=n_sample) phen_noise = sp.sqrt((1.0 - h2) / sp.var(phen_noise)) * phen_noise genetic_part = sp.dot(test_snps.T, ret_dict['betas']) genetic_part = sp.sqrt(h2 / sp.var(genetic_part)) * genetic_part test_phen = genetic_part + phen_noise ret_dict['test_phen'] = test_phen return ret_dict
def infer_diag_post(self,X_ii,D_i): X_i = dc(X_ii) ns = len(D_i) X_i.resize([ns,self.D]) [m,V] = self.infer_diag(X_i,D_i) if sp.amin(V)<=-0.: class MJMError(Exception): pass print "negative/eq variance" print [m,V,X_i,D_i] print "_______________" #self.printc() raise(MJMError) if sp.amin(sp.var(m,axis=0))<-0.: class MJMError(Exception): pass print "negativevar of mean" print X_i.shape print [m,V,sp.var(m,axis=0),X_i,D_i] print "_______________" #self.printc() raise(MJMError) return [sp.mean(m,axis=0).reshape([1,ns]),(sp.mean(V,axis=0)+sp.var(m,axis=0)).reshape([1,ns])]
def _sqr_transform(self, method='standard'): a = sp.array(self.values) if method == 'standard': vals = ((a - min(a)) + 0.1 * sp.var(a)) * ((a - min(a)) + 0.1 * sp.var(a)) else: vals = a * a self._perform_transform(vals,"sqr") return True
def prnt(filename, type, duration, run): sptp = EL.G.sig_per_turtle_p # Sig per turtle based on percent of total significance sptn = EL.G.sig_per_turtle_n # Sig per turtle based on number of significant patches visited open(filename, 'a').write(str(type) + ',' + str(EL.G.NUM_TURTLES[type]) + \ ',' + str(duration) + ',' + str(run) + ',' + \ str(EL.G.epprog) + ',' + str(EL.G.total_prog) + ',' + str(EL.G.percent_progress) + \ ',' + str(EL.G.agents_peak) + ',' + str(EL.G.agents_hill) + ',' + str(EL.G.wasted_effort) + \ ',,' + str(min(sptp)) + ',' + str(max(sptp)) + ',' + str(mean(sptp)) + ',' + str(median(sptp)) + \ ',' + str(var(sptp)) + ',' + str(skew(sptp)) + ',,' + str(min(sptn)) + ',' + str(max(sptn)) + \ ',' + str(mean(sptn)) + ',' + str(median(sptn)) + ',' + str(var(sptn)) + ',' + str(skew(sptn)) + '\n')
def ftest(X, Y): ''' F-test to test variance equality. :param X: data 1 :param Y: data 2 :return: f and p-value of F-test ''' F = scipy.var(X) / scipy.var(Y) df1, df2 = len(X), len(Y) pval = stats.f.cdf(F, df1, df2) return (F,pval)
def power(self, currentSource, active, inactive, histories, discMesh): """ power is the main method for the power method currentSource: Initial source for power method active: Number of active iterations inactive: Number of inactive iterations histories: Number histories per iteration discMesh: Mesh for discretization of FissionSource """ self.k = 1 self.eigEstI = [] # Estimate of eigenvalue from inactive iterations self.meanEigI = [] # Mean of the eigenvalues from inactive iterations self.varEigI = [] # Variance of the eigenvalues from inactive iterations self.eigEst = [] # Estimate of eigenvalue from active iterations self.meanEig = [] # Mean of the eigenvalues from active iterations self.varEig = [] # Variance of the eigenvalues from active iterations self.eigVector = [] # Eigenvector estimate for active iterations start = time.time() for i in xrange(inactive): nextSource = self.Markov_Transport(currentSource, histories) self.k = self.k*(len(currentSource)/float(histories)) self.eigEstI.append(self.k) self.meanEigI.append(scipy.mean(self.eigEst)) # Mean eigenvalue self.varEigI.append(scipy.var(self.eigEst)) # Variance eigenvalue print "I: %5i, eigenvalue = %8.6f," %(i, self.k), print " time: %8.3f sec" %(time.time() - start) currentSource = nextSource print "------------------ACTIVE ITERATIONS------------------" for self.i in xrange(active): nextSource = self.Markov_Transport(currentSource, histories) self.k = self.k*(len(currentSource)/float(histories)) self.eigEst.append(self.k) self.meanEig.append(scipy.mean(self.eigEst)) # Mean eigenvalue self.varEig.append(scipy.var(self.meanEig)) # Variance eigenvalue print "A: %5i, eigenvalue = %8.6f," %(self.i, self.k), print " mean = %6.4f, std.dev = %6.4f, time: %8.3f sec" %( self.meanEig[-1], math.sqrt(self.varEig[-1]), (time.time() - start)) # Discretized fissionSource discSource = nextSource.discretized(discMesh) discSource = discSource/sum(discSource) self.eigVector.append(discSource) currentSource = nextSource
def variance_explained(spikes, means=None, noise=None): """ Returns the fraction of variance in each channel that is explained by the means. Values below 0 or above 1 for large data sizes indicate that some assumptions were incorrect (e.g. about channel noise) and the results should not be trusted. :param dict spikes: Dictionary, indexed by unit, of :class:`neo.core.SpikeTrain` objects (where the ``waveforms`` member includes the spike waveforms) or lists of :class:`neo.core.Spike` objects. :param dict means: Dictionary, indexed by unit, of lists of spike waveforms as :class:`neo.core.Spike` objects or numpy arrays. Means for units that are not in this dictionary will be estimated using the spikes. Default: None - means will be estimated from given spikes. :type noise: Quantity 1D :param noise: The known noise levels (as variance) per channel of the original data. This should be estimated from the signal periods that do not contain spikes, otherwise the explained variance could be overestimated. If None, the estimate of explained variance is done without regard for noise. Default: None :return dict: A dictionary of arrays, both indexed by unit. If ``noise`` is ``None``, the dictionary contains the fraction of explained variance per channel without taking noise into account. If ``noise`` is given, it contains the fraction of variance per channel explained by the means and given noise level together. """ ret = {} if means is None: means = {} for u, spks in spikes.iteritems(): train = spks if not isinstance(train, neo.SpikeTrain): train = spikes_to_spike_train(spks) if u in means and means[u].waveform.shape[0] == train.waveforms.shape[1]: spike = means[u] else: spike = neo.Spike(0) spike.waveform = sp.mean(train.waveforms, axis=0) orig = sp.mean(sp.var(train.waveforms, axis=1), axis=0) waves = train.waveforms - spike.waveform new = sp.mean(sp.var(waves, axis=1), axis=0) if noise is not None: ret[u] = sp.asarray(1 - (new - noise) / orig) else: ret[u] = sp.asarray(1 - new / orig) return ret
def _measureColorEntryMonitor(self, colorentry, n=5): xyY_list = self.calibmonitor.measureGratingStimColor( colorentry.patch_stim_value, n) colorentry.monitor_xyY = ( scipy.mean([xyY[0] for xyY in xyY_list]), scipy.mean([xyY[1] for xyY in xyY_list]), scipy.mean([xyY[2] for xyY in xyY_list])) colorentry.monitor_xyY_sd = ( math.sqrt(scipy.var([xyY[0] for xyY in xyY_list])), math.sqrt(scipy.var([xyY[1] for xyY in xyY_list])), math.sqrt(scipy.var([xyY[2] for xyY in xyY_list])))
def regress_erp(y, test_idx, predictor, events, ns): event_types = events['uniqueLabel'] labels = events['label'] latencies = events['latencyInFrame'] train_idx = ~test_idx ytrn = matrix(y[train_idx].tolist()).T #There is a specific test_set to use if (len(np.where(test_idx)[0])!=0): tst_start_idx = min(np.where(test_idx)[0]) tst_end_idx = max(np.where(test_idx)[0]) #Test on all the data else: tst_start_idx = min(np.where(~test_idx)[0]) tst_end_idx = max(np.where(~test_idx)[0]) train_idx_list= np.where(train_idx==1)[0] train_idx_list = array(train_idx_list, dtype=np.int).tolist() #Solve the system of equations y = Ax P = predictor[train_idx_list,:].T*predictor[train_idx_list,:] q = -predictor[train_idx_list, :].T*ytrn rerp_vec = solvers.coneqp(P, q)['x'] yestimate = array(predictor*rerp_vec) y_temp = matrix(y.tolist()).T noise = y_temp-yestimate events_to_test = np.where((array(latencies)<tst_end_idx) & (array(latencies)>tst_start_idx))[0] gc.disable() #Compute performance stats stats = np.empty((len(event_types),2)) for i, this_type in enumerate(event_types): this_stat = np.empty((0,2)) for j, event_idx in enumerate(events_to_test): this_event=labels[event_idx] if this_event==this_type: start_idx = latencies[event_idx]; end_idx = np.minimum(tst_end_idx, start_idx+ns) yblock = y[start_idx:end_idx] noiseblock = noise[start_idx:end_idx] this_stat = np.append(this_stat, array([[sp.var(yblock)], [sp.var(noiseblock)]]).T, axis=0) rov_raw = this_stat[:,0]-this_stat[:,1] rov_nor = rov_raw/this_stat[:,0] rov = array([sp.mean(rov_raw), sp.mean(rov_nor)]) stats[i,:] = rov gc.enable() return stats, np.reshape(array(rerp_vec),(-1, ns)).T
def MLE_iteration_constrain(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=10*scipy.var(numpy.array(psi1)-beta_0); var2=10*scipy.var(numpy.array(psi2)-beta_1); if var1<=0.01: var1=0.01; if var2<=0.01: var2=0.01; print('var1');print(var1);print('var2');print(var2); while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); print('var1');print(var1);print('var2');print(var2); #if abs(sum(psi1)/len(psi1)-sum(psi2)/len(psi2))>cutoff: if (sum(psi1)/len(psi1))>(sum(psi2)/len(psi2)):#minize psi2 if this is the case xopt = fmin_l_bfgs_b(myfunc_1,[sum(psi2)/len(psi2)],myfunc_der_1,args=[psi1,psi2,var1,var2],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta2 = max(min(float(xopt[0]),1-cutoff),0);theta1=theta2+cutoff; else:#minize psi1 if this is the case xopt = fmin_l_bfgs_b(myfunc_2,[sum(psi1)/len(psi1)],myfunc_der_2,args=[psi1,psi2,var1,var2],bounds=[[0.001,0.999-cutoff]],iprint=-1) theta1 = max(min(float(xopt[0]),1-cutoff),0);theta2=theta1+cutoff; print('constrain_1xopt');print('theta');print(theta1);print(theta2);print(xopt); #else: # theta1=sum(psi1)/len(psi1);theta2=sum(psi2)/len(psi2); beta_0=theta1;beta_1=theta2; #iteration of psi new_psi1=[];new_psi2=[];current_sum=0;likelihood_sum=0; print('constrain_2xopt'); for i in range(len(psi1)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i]],myfunc_individual_der,args=[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi1.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt); #likelihood_sum+=myfunc_marginal(new_psi1[i],[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length]); for i in range(len(psi2)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi2[i]],myfunc_individual_der,args=[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi2.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt); #likelihood_sum+=myfunc_marginal(new_psi2[i],[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length]); print('new_psi[0]');print(new_psi1[0]);print(new_psi2[0]); psi1=new_psi1;psi2=new_psi2; print('count');print(count);print('previous_sum');print(previous_sum);print('current_sum');print(current_sum); if count>1: iter_cutoff=abs(previous_sum-current_sum); previous_sum=current_sum; #print('constrain');print(theta1);print(theta2);print(psi1);print(psi2);print(current_sum);print(likelihood_sum); #print(xopt); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
def _box_cox_transform(self, verbose=False, method='standard'): """ Performs the Box-Cox transformation, over different ranges, picking the optimal one w. respect to normality. """ from scipy import stats a = sp.array(self.values) if method == 'standard': vals = (a - min(a)) + 0.1 * sp.var(a) else: vals = a sw_pvals = [] lambdas = sp.arange(-2.0, 2.1, 0.1) for l in lambdas: if l == 0: vs = sp.log(vals) else: vs = ((vals ** l) - 1) / l r = stats.shapiro(vs) if sp.isfinite(r[0]): pval = r[1] else: pval = 0.0 sw_pvals.append(pval) i = sp.argmax(sw_pvals) l = lambdas[i] if l == 0: vs = sp.log(vals) else: vs = ((vals ** l) - 1) / l self._perform_transform(vs,"box_cox") log.debug('optimal lambda was %0.1f' % l) return True
def simulate_betas(num_traits=1000, p=0.1, m=100, h2=0.5, effect_prior='gaussian', verbose=False): betas_list = [] for i in range(num_traits): if effect_prior == 'gaussian': if p == 1.0: betas = stats.norm.rvs(0, sp.sqrt(h2 / m), size=m) else: M = int(round(m * p)) betas = sp.concatenate( (stats.norm.rvs(0, sp.sqrt(h2 / M), size=M), sp.zeros(m - M, dtype=float))) elif effect_prior == 'laplace': if p == 1.0: betas = stats.laplace.rvs(scale=sp.sqrt(h2 / (2 * m)), size=m) else: M = int(round(m * p)) betas = sp.concatenate( (stats.laplace.rvs(scale=sp.sqrt(h2 / (2 * M)), size=M), sp.zeros(m - M, dtype=float))) betas_var = sp.var(betas) beta_scalar = sp.sqrt(h2 / (m * betas_var)) betas = betas * beta_scalar sp.random.shuffle(betas) betas_list.append(betas) return sp.array(betas_list)
def _exp_transform(values, standard=True): a = sp.array(values) if standard: vals = sp.exp((a - min(a)) + 0.1 * sp.var(a)) else: vals = sp.exp(a) return vals
def main(): args = parse_args() dt = 1.32e-14 with h5py.File(args.filename) as h5: no_scans, no_steps, no_angles, no_pulses = h5["raw_quadratures"].shape if args.scans == "all": scans = range(no_scans) else: scans = args.scans t = linspace(0, no_steps * dt, no_steps) dphi = h5["corrected_angles"][0, 0, 1] - h5["corrected_angles"][0, 0, 0] phi = linspace(0.5 * pi, 2.5 * pi, 2. * pi / dphi) av_mean = scipy.zeros((len(phi), len(t)), dtype=scipy.float32) for i_scan in scans: for i_step in xrange(no_steps): print i_scan, i_step ip = interp1d( h5["corrected_angles"][i_scan, i_step], var(h5["standardized_quadratures"][i_scan, i_step], axis=1)) av_mean[:, i_step] += ip(phi) from matplotlib import pyplot pyplot.imshow(av_mean) pyplot.show()
def infer_diag_post(self,X_,D_i): X_i = copy.copy(X_) ns = len(D_i) X_i.resize([ns,self.d]) [m,V] = self.infer_diag(X_i,D_i) if sp.amin(V)<=-0.: print( "negative/eq variance") print( "_______________") raise(flow_Error) if sp.amin(sp.var(m,axis=0))<-0.: print( "negativevar of mean") print( "_______________") raise(flow_Error) return [sp.mean(m,axis=0).reshape([1,ns]),(sp.mean(V,axis=0)+sp.var(m,axis=0)).reshape([1,ns])]
def _box_cox_transform(values, standard=True): """ Performs the Box-Cox transformation, over different ranges, picking the optimal one w. respect to normality. """ a = sp.array(values) if standard: vals = (a - min(a)) + 0.1 * sp.var(a) else: vals = a sw_pvals = [] lambdas = sp.arange(-2.0, 2.1, 0.1) for l in lambdas: if l == 0: vs = sp.log(vals) else: vs = ((vals**l) - 1) / l r = stats.shapiro(vs) if sp.isfinite(r[0]): pval = r[1] else: pval = 0.0 sw_pvals.append(pval) i = sp.argmax(sw_pvals) l = lambdas[i] if l == 0: vs = sp.log(vals) else: vs = ((vals**l) - 1) / l return vs
def mergelines(x, y): minx = max([min(i) for i in x]) maxx = min([max(i) for i in x]) fs = [] for i in xrange(len(x)): #print [x[i].shape,y[i].shape] fs.append(interp1d(x[i], y[i])) X = [ i for i in sorted(sp.hstack([sp.array(j) for j in x])) if i <= maxx and i >= minx ] np = len(X) X = sp.array(X) Y = sp.empty(np) ub = sp.empty(np) lb = sp.empty(np) for i in xrange(np): q = [j(X[i]) for j in fs] Y[i] = sp.mean(q) v = sp.var(q) ub[i] = Y[i] + 2. * sp.sqrt(v) lb[i] = Y[i] - 2. * sp.sqrt(v) return X, Y, lb, ub
def ar1fit(ts): ''' Fits an AR(1) model to the time series data ts. AR(1) is a linear model of the form x_t = beta * x_{t-1} + c + e_{t-1} where beta is the coefficient of term x_{t-1}, c is a constant and x_{t-1} is an i.i.d. noise term. Here we assume that e_{t-1} is normally distributed. Returns the tuple (beta, c, sigma). ''' # Fitting AR(1) entails finding beta, c, and the noise term. # Beta is well approximated by the coefficient of OLS regression # on the lag of the data with itself. Since the noise term is # assumed to be i.i.d. and normal, we must only estimate sigma, # the standard deviation. # Estimate beta x = ts[0:-1] y = ts[1:] p = sp.polyfit(x, y, 1) beta = p[0] # Estimate c c = sp.mean(ts) * (1 - beta) # Estimate the variance from the residuals of the OLS regression. yhat = sp.polyval(p, x) variance = sp.var(y - yhat) sigma = sp.sqrt(variance) return beta, c, sigma
def GRDRun(self, chains): """This is a implementation of the Gelman Rubin diagnostic""" mean_chain = [] var_chain = [] if len(chains) == 1: lchain = len(chains[0]) // 2 chains = [chains[0][:lchain], chains[0][lchain:]] else: clen = [len(chain) for chain in chains] if len(set(clen)) == 1: lchain = clen[0] else: #print('take same # steps', clen) lchain = min(clen) try: for chain in chains: mean_chain.append(sp.mean(chain[-lchain:], axis=0)) var_chain.append(sp.var(chain[-lchain:], axis=0)) except: return 1 M = sp.mean(mean_chain, axis=0) W = sp.mean(var_chain, axis=0) B = sum([(b - M)**2 for b in mean_chain]) B = lchain / (len(chains) - 1.) * B R = (1. - 1. / lchain) * W + B / lchain result = sp.array(sp.absolute(1 - sp.sqrt(R / W))) return result
def univariate_gelman_rubin(chains): """ http://www.stat.columbia.edu/~gelman/research/published/brooksgelman2.pdf dim 0: nchains dim 1: nsteps """ nchains = len(chains) mean = scipy.asarray([scipy.mean(chain, axis=0) for chain in chains]) variance = scipy.asarray( [scipy.var(chain, ddof=1, axis=0) for chain in chains]) nsteps = scipy.asarray([len(chain) for chain in chains]) Wn1 = scipy.mean(variance) Wn = scipy.mean((nsteps - 1.) / nsteps * variance) B = scipy.var(mean, ddof=1) V = Wn + (nchains + 1.) / nchains * B return scipy.sqrt(V / Wn1)
def fish_many(): # 変数の中身確認用 def variable_confirm(c1, c2): line_f = list_format(c2) line = "---------------------------" print(c1.head()) print(line) print(line_f.format()) print(line) print("c2の平均 >> " + str(f'{c2.mean():.3f}')) # 10000尾のサンプルデータ data = pd.read_csv("/root/app/sts4_csv.csv")["length"] # 10尾のサンプリング rmdata10 = np.random.choice(data, size=10, replace=False) # 母集団分布の準備 base_mean = data.mean() base_std = sp.std(data, ddof=0) # 母標準偏差 base_var = sp.var(data, ddof=0) # 母分散 # グラフ描写 def sigma_graph(list, op1, op2, op3): title = "fish_population_graph" plt.title(title) graph = sns.distplot(list, kde=False, color='black') # 表示 canvas = f.image_graph(graph, title) canvas.view_option(op1, op2, op3) # run function # variable_confirm(data, rmdata10) sigma_graph(data, base_mean, base_std, base_var)
def calc_opt_rho(self): from limix_core.covar import FreeFormCov from limix_core.gp import GP2KronSumLR _covs = sp.concatenate([self.F, self.W, self.x], 1) xoE = self.x * self.Env gp = GP2KronSumLR(Y=self.y, F=_covs, A=sp.eye(1), Cn=FreeFormCov(1), G=xoE) gp.covar.Cr.setCovariance(1e-4 * sp.ones((1, 1))) gp.covar.Cn.setCovariance(0.02 * sp.ones((1, 1))) gp.optimize(verbose=False) # var_xEEx = sp.tr(xEEx P)/(n-1) = sp.tr(PW (PW)^T)/(n-1) = (PW**2).sum()/(n-1) # W = xE # variance heterogenenty var_xEEx = ((xoE - xoE.mean(0))**2).sum() var_xEEx /= float(self.y.shape[0] - 1) v_het = gp.covar.Cr.K()[0, 0] * var_xEEx # variance persistent v_comm = sp.var(gp.b()[-1] * self.x) rho = v_het / (v_comm + v_het) return rho
def _box_cox_transform(self, verbose=False, method='standard'): """ Performs the Box-Cox transformation, over different ranges, picking the optimal one w. respect to normality. """ from scipy import stats a = sp.array(self.values) if method == 'standard': vals = (a - min(a)) + 0.1 * sp.var(a) else: vals = a sw_pvals = [] lambdas = sp.arange(-2.0, 2.1, 0.1) for l in lambdas: if l == 0: vs = sp.log(vals) else: vs = ((vals**l) - 1) / l r = stats.shapiro(vs) if sp.isfinite(r[0]): pval = r[1] else: pval = 0.0 sw_pvals.append(pval) i = sp.argmax(sw_pvals) l = lambdas[i] if l == 0: vs = sp.log(vals) else: vs = ((vals**l) - 1) / l self._perform_transform(vs, "box_cox") log.debug('optimal lambda was %0.1f' % l) return True
def DataArrayStatisticsReport(parent, titleString, tempdata): scrolledText = tk_stxt.ScrolledText(parent, width=textboxWidth, height=textboxHeight, wrap=tk.NONE) scrolledText.insert(tk.END, titleString + '\n\n') # must at least have max and min minData = min(tempdata) maxData = max(tempdata) if maxData == minData: scrolledText.insert(tk.END, 'All data has the same value,\n') scrolledText.insert(tk.END, "value = %-.16E\n" % (minData)) scrolledText.insert(tk.END, 'statistics cannot be calculated.') else: scrolledText.insert(tk.END, "max = %-.16E\n" % (maxData)) scrolledText.insert(tk.END, "min = %-.16E\n" % (minData)) try: temp = scipy.mean(tempdata) scrolledText.insert(tk.END, "mean = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "mean gave error in calculation\n") try: temp = scipy.stats.sem(tempdata) scrolledText.insert(tk.END, "standard error of mean = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "standard error of mean gave error in calculation\n") try: temp = scipy.median(tempdata) scrolledText.insert(tk.END, "median = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "median gave error in calculation\n") try: temp = scipy.var(tempdata) scrolledText.insert(tk.END, "variance = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "variance gave error in calculation\n") try: temp = scipy.std(tempdata) scrolledText.insert(tk.END, "std. deviation = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "std. deviation gave error in calculation\n") try: temp = scipy.stats.skew(tempdata) scrolledText.insert(tk.END, "skew = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "skew gave error in calculation\n") try: temp = scipy.stats.kurtosis(tempdata) scrolledText.insert(tk.END, "kurtosis = %-.16E\n" % (temp)) except: scrolledText.insert(tk.END, "kurtosis gave error in calculation\n") return scrolledText
def dumpSeries(self): for series in self.series: print "name:",series.getFullName() for index,value in enumerate(series): print value #print "index=",index, " , value=",value print "avg=",scipy.average(series)," , variance=",scipy.var(series), " , stddev=",scipy.std(series)
def indof_constfeatures(X, axis=0): ''' Assumes features are columns (by default, but can do rows), and checks to see if all features are simply constants, such that it is equivalent to a bias and nothing else ''' featvar = sp.var(X, axis=axis) badind = sp.nonzero(featvar == 0)[0] return badind
def _exp_transform(self, method='standard'): a = sp.array(self.values) if method == 'standard': vals = sp.exp((a - min(a)) + 0.1 * sp.var(a)) else: vals = sp.exp(a) self._perform_transform(vals, "exp") return True
def test_basic(self): time_stream, ra, dec, az, el, time, mask_inds = \ self.Maker.preprocess_data(self.Blocks) self.assertTrue(sp.allclose(sp.mean(time_stream, 1), 0, atol=0.2)) self.assertTrue(sp.allclose(sp.var(time_stream, 1), self.norms[0,:], rtol=0.4)) self.assertTrue(sp.allclose(self.Maker.channel_vars, self.norms[0,:], rtol=0.4))
def findAccessAnomalies(data): # breaks down minute-long intervals from data intervalDict = {} for access in data: # breaks to 10-second intervals seconds = int(access[3]) seconds = seconds - (seconds%10) key = (int(access[1]), int(access[2]), seconds) if key in intervalDict: intervalDict[key].append(access) else: intervalDict[key] = [access] totAccess = [len(intervalDict[key]) for key in intervalDict] totAccessMean = sc.mean(totAccess) totAccessVar = sc.var(totAccess) # print totAccessMean # print totAccessVar clientAccess = [] clientDict = {} for key in intervalDict: count = Counter([access[10] for access in intervalDict[key]]) for ckey in count: clientAccess.append(count[ckey]) clientDict[(key[0], key[1], key[2], ckey)] = count[ckey] clientAccessMean = sc.mean(clientAccess) clientAccessVar = sc.var(clientAccess) # print clientAccessMean # print clientAccessVar clientAttackProb = {} for key in clientDict: totProb = totAccessVar/pow((totAccessMean-len(intervalDict[(key[0],key[1],key[2])])),2) clientProb = clientAccessVar/pow((clientAccessMean-clientDict[key]),2) prob = (totProb + clientProb)/2 clientAttackProb[key] = prob arr = [] for i in range(10): minKey = min(clientAttackProb, key=clientAttackProb.get) arr.append((minKey, clientAttackProb[minKey])) clientAttackProb.pop(minKey, None) return arr
def test_basic(self): time_stream, ra, dec, az, el, time, mask_inds = \ self.Maker.preprocess_data(self.Blocks) self.assertTrue(sp.allclose(sp.mean(time_stream, 1), 0, atol=0.2)) self.assertTrue( sp.allclose(sp.var(time_stream, 1), self.norms[0, :], rtol=0.4)) self.assertTrue( sp.allclose(self.Maker.channel_vars, self.norms[0, :], rtol=0.4))
def eval(self, f=lambda x: 1): """" evaluate function on the values and take weighted average. """ if self.weights is None: vec = [f(v) for v in self.values] result = mean(vec) variance = var(vec) else: v = self.values vec = [f(v[i]) * w for i, w in enumerate(self.weights)] result = sum(vec) variance = var(vec) return result, variance
def _exp_transform(self, method='standard'): a = sp.array(self.values) if method == 'standard': vals = sp.exp((a - min(a)) + 0.1 * sp.var(a)) else: vals = sp.exp(a) self._perform_transform(vals,"exp") return True
def indof_constfeatures(X,axis=0): ''' Assumes features are columns (by default, but can do rows), and checks to see if all features are simply constants, such that it is equivalent to a bias and nothing else ''' featvar=sp.var(X,axis=axis) badind = sp.nonzero(featvar==0)[0] return badind
def DataArrayStatistics(inArray): returnString = '' # uild this as we progress # must at least have max and min minData = min(inArray) maxData = max(inArray) if maxData == minData: returnString += 'All data has the same value,\n' returnString += "value = %-.16E\n" % (minData) returnString += 'statistics cannot be calculated.' else: returnString += "max = %-.16E\n" % (maxData) returnString += "min = %-.16E\n" % (minData) try: temp = scipy.mean(inArray) returnString += "mean = %-.16E\n" % (temp) except: returnString += "mean gave error in calculation\n" try: temp = scipy.stats.sem(inArray) returnString += "standard error of mean = %-.16E\n" % (temp) except: returnString += "standard error of mean gave error in calculation\n" try: temp = scipy.median(inArray) returnString += "median = %-.16E\n" % (temp) except: returnString += "median gave error in calculation\n" try: temp = scipy.var(inArray) returnString += "variance = %-.16E\n" % (temp) except: returnString += "variance gave error in calculation\n" try: temp = scipy.std(inArray) returnString += "std. deviation = %-.16E\n" % (temp) except: returnString += "std. deviation gave error in calculation\n" try: temp = scipy.stats.skew(inArray) returnString += "skew = %-.16E\n" % (temp) except: returnString += "skew gave error in calculation\n" try: temp = scipy.stats.kurtosis(inArray) returnString += "kurtosis = %-.16E\n" % (temp) except: returnString += "kurtosis gave error in calculation\n" return returnString
def DataArrayStatistics(inArray): returnString = '' # build this as we progress # must at least have max and min minData = min(inArray) maxData = max(inArray) if maxData == minData: returnString += 'All data has the same value,\n' returnString += "value = %-.16E\n" % (minData) returnString += 'statistics cannot be calculated.' else: returnString += "max = %-.16E\n" % (maxData) returnString += "min = %-.16E\n" % (minData) try: temp = scipy.mean(inArray) returnString += "mean = %-.16E\n" % (temp) except: returnString += "mean gave error in calculation\n" try: temp = scipy.stats.sem(inArray) returnString += "standard error of mean = %-.16E\n" % (temp) except: returnString += "standard error of mean gave error in calculation\n" try: temp = scipy.median(inArray) returnString += "median = %-.16E\n" % (temp) except: returnString += "median gave error in calculation\n" try: temp = scipy.var(inArray) returnString += "variance = %-.16E\n" % (temp) except: returnString += "variance gave error in calculation\n" try: temp = scipy.std(inArray) returnString += "std. deviation = %-.16E\n" % (temp) except: returnString += "std. deviation gave error in calculation\n" try: temp = scipy.stats.skew(inArray) returnString += "skew = %-.16E\n" % (temp) except: returnString += "skew gave error in calculation\n" try: temp = scipy.stats.kurtosis(inArray) returnString += "kurtosis = %-.16E\n" % (temp) except: returnString += "kurtosis gave error in calculation\n" return returnString
def detect_skew(img, min_angle=-20, max_angle=20, quality='low'): img = sp.atleast_2d(img) rows, cols = img.shape min_min_angle = min_angle max_max_angle = max_angle if quality == 'low': resolution = sp.arctan2(2.0, cols) * 180.0 / sp.pi min_target_size = 100 resize_order = 1 elif quality == 'high': resolution = sp.arctan2(1.0, cols) * 180.0 / sp.pi min_target_size = 300 resize_order = 3 else: resolution = sp.arctan2(1.0, cols) * 180.0 / sp.pi min_target_size = 200 resize_order = 2 # resize the image so it's faster to work with min_size = min(rows, cols) target_size = min_target_size if min_size > min_target_size else min_size resize_ratio = float(target_size) / min_size img = imresize(img, resize_ratio) rows, cols = img.shape # pad the image and invert the colors img *= -1 img += 255 padded_img = sp.zeros((rows*2, cols*2)) padded_img[rows//2:rows//2+rows, cols//2:cols//2+cols] = img img = padded_img # keep dividing the interval in half to achieve O(log(n)) while True: current_resolution = (max_angle - min_angle) / 30.0 best_angle = None best_variance = 0.0 # rotate the image, sum the pixel values in each row for each rotation # then find the variance of all the sums, pick the highest variance for i in xrange(31): angle = min_angle + i * current_resolution rotated_img = rotate(img, angle, reshape=False, order=resize_order) num_black_pixels = sp.sum(rotated_img, axis=1) variance = sp.var(num_black_pixels) if variance > best_variance: best_angle = angle best_variance = variance if current_resolution < resolution: break # update the angle range min_angle = max(best_angle - current_resolution, min_min_angle) max_angle = min(best_angle + current_resolution, max_max_angle) return best_angle
def aggregate_raw_data(y): ''' Compute means and covariances of the raw data y :param y: array(n_periods x n_individuals x n_vars) :return: ''' m = sp.mean(y, axis=0) c = sp.var(y, axis=0) return m, c
def _fit(self, type, vc=False): #2. init if type == 'null': self.gp[type].covar.Cn.setCovariance(self.covY) elif type == 'full': Cn0_K = self.gp['null'].covar.Cn.K() #self.gp[type].covar.Cr.setCovariance(1e-4*sp.ones(self.covY.shape)+1e-4*sp.eye(self.covY.shape[0])) self.gp[type].covar.Cr.setCovariance(0.5 * Cn0_K) self.gp[type].covar.Cn.setCovariance(0.5 * Cn0_K) elif type == 'block': Crf_K = self.gp['full'].covar.Cr.K() Cnf_K = self.gp['full'].covar.Cn.K() self.gp[type].covar.Cr.scale = sp.mean(Crf_K) self.gp[type].covar.Cn.setCovariance(Cnf_K) elif type == 'rank1': Crf_K = self.gp['full'].covar.Cr.K() Cnf_K = self.gp['full'].covar.Cn.K() self.gp[type].covar.Cr.setCovariance(Crf_K) self.gp[type].covar.Cn.setCovariance(Cnf_K) else: print('poppo') self.gp[type].optimize(factr=self.factr, verbose=False) RV = { 'Cr': self.gp[type].covar.Cr.K(), 'Cn': self.gp[type].covar.Cn.K(), 'B': self.gp[type].mean.B[0], 'LML': sp.array([self.gp[type].LML()]), 'LMLgrad': sp.array([sp.mean((self.gp[type].LML_grad()['covar'])**2)]) } if vc: # tr(P CoR) = tr(C)tr(R) - tr(Ones C) tr(Ones R) / float(NP) # = tr(C)tr(R) - C.sum() * R.sum() / float(NP) trRr = (self.Xr**2).sum() var_r = sp.trace(RV['Cr']) * trRr / float(self.Y.size - 1) var_c = sp.var(sp.dot(self.F, RV['B'])) var_n = sp.trace(RV['Cn']) * self.Y.shape[0] var_n -= RV['Cn'].sum() / float(RV['Cn'].shape[0]) var_n /= float(self.Y.size - 1) RV['var'] = sp.array([var_r, var_c, var_n]) if 0 and self.Y.size < 5000: pdb.set_trace() Kr = sp.kron(RV['Cr'], sp.dot(self.Xr, self.Xr.T)) Kn = sp.kron(RV['Cn'], sp.eye(self.Y.shape[0])) _var_r = sp.trace(Kr - Kr.mean(0)) / float(self.Y.size - 1) _var_n = sp.trace(Kn - Kn.mean(0)) / float(self.Y.size - 1) _var = sp.array([_var_r, var_c, _var_n]) print(((_var - RV['var'])**2).mean()) if type == 'full': # calculate within region vcs Cr_block = sp.mean(RV['Cr']) * sp.ones(RV['Cr'].shape) Cr_rank1 = lowrank_approx(RV['Cr'], rank=1) var_block = sp.trace(Cr_block) * trRr / float(self.Y.size - 1) var_rank1 = sp.trace(Cr_rank1) * trRr / float(self.Y.size - 1) RV['var_r'] = sp.array( [var_block, var_rank1 - var_block, var_r - var_rank1]) return RV
def _measureColorEntryTubes(self, colorentry, n=5): vol_col_spec_list = self.calibtubes.measureVoltages( [colorentry.voltages,], imi=0.5, each=n) colorentry.tubes_xyY = ( scipy.mean([vol_col_spec[1][0] for vol_col_spec in vol_col_spec_list]), scipy.mean([vol_col_spec[1][1] for vol_col_spec in vol_col_spec_list]), scipy.mean([vol_col_spec[1][2] for vol_col_spec in vol_col_spec_list])) colorentry.tubes_xyY_sd = ( math.sqrt(scipy.var([vol_col_spec[1][0] for vol_col_spec in vol_col_spec_list])), math.sqrt(scipy.var([vol_col_spec[1][1] for vol_col_spec in vol_col_spec_list])), math.sqrt(scipy.var([vol_col_spec[1][2] for vol_col_spec in vol_col_spec_list])))
def dumpSeries(self): for series in self.series: print "name:", series.getFullName() for index, value in enumerate(series): print value #print "index=",index, " , value=",value print "avg=", scipy.average(series), " , variance=", scipy.var( series), " , stddev=", scipy.std(series)
def calc_variance(filename, key): """ Calculates the Variance of the Fileinput of the given Key. """ a = [] for item in items(filename): a.append(item[key]) return scipy.var(a)
def dist_parameters(lens): #compute Negative binomial distribution parameters m = scipy.mean(lens) v = scipy.var(lens) p = (v-m)/v r = m*(1-p)/p return m, v, p, r
def test_serial1(guys): arr = sp.array(list(to_ints(guys)),dtype="float") n = len(guys) mu = sp.mean(arr) v = sp.var(arr) front = arr[1:] back = arr[:-1] return 1-abs((1./n * sp.sum( (front-mu)*(back-mu) ))/(v+1e-10))
def __extract_conditions(self, dmap): conditions = dict() for v in self.conditioned_vars: values = dmap[v] if sp.var(values) > 0: raise ValueError( "Expected conditioning variable {0} to be constant at inference time" .format(v)) conditions[v] = values[0] return conditions
def MLE_iteration(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=10*scipy.var(numpy.array(psi1)-beta_0); var2=10*scipy.var(numpy.array(psi2)-beta_1); if var1<=0.01: var1=0.01; if var2<=0.01: var2=0.01; #print('var1');print(var1);print('var2');print(var2); while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); xopt=fmin_l_bfgs_b(myfunc_multivar,[beta_0,beta_1],myfunc_multivar_der,args=[psi1,psi2,var1,var2],bounds=[[0.01,0.99],[0.01,0.99]],iprint=-1); beta_0=float(xopt[0][0]); beta_1=float(xopt[0][1]); #print('unconstrain_1xopt');print(xopt); #print('theta');print(beta_0);print(beta_1);print('theta_end'); #iteration of psi new_psi1=[];new_psi2=[];current_sum=0;likelihood_sum=0; for i in range(len(psi1)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i]],myfunc_individual_der,args=[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi1.append(float(xopt[0]));current_sum+=float(xopt[1]);#print(xopt); #likelihood_sum+=myfunc_marginal(new_psi1[i],[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length]); for i in range(len(psi2)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi2[i]],myfunc_individual_der,args=[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi2.append(float(xopt[0]));current_sum+=float(xopt[1]);#print(xopt); #likelihood_sum+=myfunc_marginal(new_psi2[i],[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length]); #print('new_psi[0]');print(new_psi1[0]);#print(new_psi2[0]); psi1=new_psi1;psi2=new_psi2;#print #print('count');print(count);('previous_sum');print(previous_sum);print('current_sum');print(current_sum); if count>1: iter_cutoff=abs(previous_sum-current_sum); previous_sum=current_sum; #print('unconstrain');print(beta_0);print(beta_0+beta_1);print(psi1);print(psi2);print(current_sum);print(likelihood_sum); #print(xopt); if count>iter_maxrun: return([current_sum,[psi1,psi2,0,0,var1,var2]]); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
def MLE_iteration(i1,i2,s1,s2,effective_inclusion_length,effective_skipping_length): psi1=vec2psi(i1,s1,effective_inclusion_length,effective_skipping_length);psi2=vec2psi(i2,s2,effective_inclusion_length,effective_skipping_length); iter_cutoff=1;iter_maxrun=100;count=0;previous_sum=0; beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); var1=10*scipy.var(numpy.array(psi1)-beta_0); var2=10*scipy.var(numpy.array(psi2)-beta_1); if var1<=0.01: var1=0.01; if var2<=0.01: var2=0.01; print('var1');print(var1);print('var2');print(var2); while((iter_cutoff>0.01)&(count<=iter_maxrun)): count+=1; #iteration of beta beta_0=sum(psi1)/len(psi1); beta_1=sum(psi2)/len(psi2); xopt=fmin_l_bfgs_b(myfunc_multivar,[beta_0,beta_1],myfunc_multivar_der,args=[psi1,psi2,var1,var2],bounds=[[0.01,0.99],[0.01,0.99]],iprint=-1); beta_0=float(xopt[0][0]); beta_1=float(xopt[0][1]); print('unconstrain_1xopt');print(xopt); print('theta');print(beta_0);print(beta_1);print('theta_end'); #iteration of psi new_psi1=[];new_psi2=[];current_sum=0;likelihood_sum=0; for i in range(len(psi1)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi1[i]],myfunc_individual_der,args=[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi1.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt); #likelihood_sum+=myfunc_marginal(new_psi1[i],[i1[i],s1[i],beta_0,var1,effective_inclusion_length,effective_skipping_length]); for i in range(len(psi2)): xopt = fmin_l_bfgs_b(myfunc_individual,[psi2[i]],myfunc_individual_der,args=[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length],bounds=[[0.01,0.99]],iprint=-1); new_psi2.append(float(xopt[0]));current_sum+=float(xopt[1]);print(xopt); #likelihood_sum+=myfunc_marginal(new_psi2[i],[i2[i],s2[i],beta_1,var2,effective_inclusion_length,effective_skipping_length]); print('new_psi[0]');print(new_psi1[0]);print(new_psi2[0]); psi1=new_psi1;psi2=new_psi2;print print('count');print(count);('previous_sum');print(previous_sum);print('current_sum');print(current_sum); if count>1: iter_cutoff=abs(previous_sum-current_sum); previous_sum=current_sum; #print('unconstrain');print(beta_0);print(beta_0+beta_1);print(psi1);print(psi2);print(current_sum);print(likelihood_sum); #print(xopt); if count>iter_maxrun: return([current_sum,[psi1,psi2,0,0,var1,var2]]); return([current_sum,[psi1,psi2,beta_0,beta_1,var1,var2]]);
def generate_test_data_w_sum_stats(h2=0.5, n=100000, n_sample=100, m=50000, model='gaussian', p=1.0, conseq_r2=0, m_ld_chunk_size=100): """ Generate """ #Get LD sample matrix D_sample = genotypes.get_sample_D(200, conseq_r2=conseq_r2, m=m_ld_chunk_size) #Simulate beta_hats ret_dict = simulate_beta_hats(h2=h2, n=n, n_sample=n_sample, m=m, model=model, p=p, conseq_r2=conseq_r2, m_ld_chunk_size=m_ld_chunk_size, D_sample=D_sample) #Simulate test genotypes test_snps = genotypes.simulate_genotypes_w_ld( n_sample=n_sample, m=m, conseq_r2=conseq_r2, m_ld_chunk_size=m_ld_chunk_size) ret_dict['test_snps'] = test_snps #Simulate test phenotypes phen_noise = stats.norm.rvs(0, sp.sqrt(1.0 - h2), size=n_sample) phen_noise = sp.sqrt((1.0 - h2) / sp.var(phen_noise)) * phen_noise genetic_part = sp.dot(test_snps.T, ret_dict['betas']) genetic_part = sp.sqrt(h2 / sp.var(genetic_part)) * genetic_part test_phen = genetic_part + phen_noise ret_dict['test_phen'] = test_phen return ret_dict
def diagPlots(s, qs): axs[0].plot(ts, qs, '-') Rs = (qs[2:] - 2*qs[1:-1] + qs[:-2])/tau**2 + pot.dV(qs[1:-1]) + gamma * (qs[2:]-qs[:-2])/(2*tau) print scipy.mean(Rs) * (beta*tau)/gamma, scipy.var(Rs) * (beta*tau) / gamma # axs[1].plot(ts[1:-1], Rs) # axs[1].plot(s, scipy.var(Rs) * (beta*tau) / gamma, '.') mean = scipy.mean(Rs) std = scipy.std(Rs) axs[1].plot(scipy.array((s,s)), scipy.array((mean-std, mean+std)) * (beta*tau) / gamma, '-') axs[1].plot(s, mean * (beta*tau) / gamma, 'o')
def descriptive_statistics(): my_data = sp.randn(100) # 100 random numbers print len(my_data) # 100 #print my_data # [ -9.90322017e-01 1.15233159e-01 -2.93076899e-02 -2.17625707e-01 # -1.27680249e-02 5.14887346e-01 1.89355659e-01 1.52055706e+00...] ### NumPy - some basic functions from numpy and scipy overlap print("Mean: {0:8.6f}".format(np.mean(my_data))) # Mean: 0.094097 print("Minimum: {0:8.6f}".format(np.min(my_data))) # Minimum: -2.437701 print("Maximum: {0:8.6f}".format(np.max(my_data))) # Maximum: 2.333469 print("Median: {0:8.6f}".format(np.median(my_data))) # Median: 0.084608 ### SciPy print("Variance with N in denominator: {0:8.6f}".format(sp.var(my_data))) # Variance with N in denominator: 1.011191 print("Variance with N-1 in denominator: {0:8.6f}".format( sp.var(my_data, ddof=1))) # Variance with N-1 in denominator: 1.021405 print("Std. Deviation: {0:8.6f}".format(sp.std(my_data))) # Std. Deviation: 1.005580 print("Skew: {0:8.6f}".format(stats.skew(my_data))) # Skew: -0.085338 print("Kurtosis: {0:8.6f}".format(stats.kurtosis(my_data))) # Kurtosis: -0.511248 print("Describe: "), stats.describe(my_data)
def trustvariance( K, d ): """ This function evaluate the trust variance on more than one datasets. If you evaluate twice the same thing, the evaluate function be able to remember it (if you call it with evolutionmap). Parameters: K = network d = date """ return (d,float(scipy.var(K.weights_list())))
def diagPlots(s, qs): # axs[0].plot(ts, qs, '-') Rs = (qs[2:] - 2*qs[1:-1] + qs[:-2])/tau**2 + pot.dV(qs[1:-1]) + gamma * (qs[2:]-qs[:-2])/(2*tau) # print (qs[2:] - 2*qs[1:-1] + qs[:-2])/tau**2 # print pot.dV(qs[1:-1]) # print gamma * (qs[2:]-qs[:-2])/(2*tau) print scipy.mean(Rs) * (beta*tau)/gamma, scipy.var(Rs) * (beta*tau) / gamma # axs[1].plot(ts[1:-1], Rs) # axs[1].plot(s, scipy.var(Rs) * (beta*tau) / gamma, '.') mean = scipy.mean(Rs) std = scipy.std(Rs)
def fnormal(X): mu = np.array(sc.mean(X, 0)) #get the mean of the data var = np.array(sc.var(X, 0)) #get the variance of the data var = var**0.5 #calculate the standard deviation of the datat mu = np.ones([np.size(X, 0), np.size(X, 1)]) * mu #build the mean matrix var = np.ones([np.size(X, 0), np.size(X, 1) ]) * var #build the standard deviation matrix X = np.subtract(X, mu) #subtract the mean from the given data X = np.divide( X, var) #divide the data with its corresponding standard deviation return X, mu, var #return the normalised data along with mean and standard deviation
def descriptive_statistics(): my_data = sp.randn(100) # 100 random numbers print len(my_data) # 100 # print my_data # [ -9.90322017e-01 1.15233159e-01 -2.93076899e-02 -2.17625707e-01 # -1.27680249e-02 5.14887346e-01 1.89355659e-01 1.52055706e+00...] ### NumPy - some basic functions from numpy and scipy overlap print ("Mean: {0:8.6f}".format(np.mean(my_data))) # Mean: 0.094097 print ("Minimum: {0:8.6f}".format(np.min(my_data))) # Minimum: -2.437701 print ("Maximum: {0:8.6f}".format(np.max(my_data))) # Maximum: 2.333469 print ("Median: {0:8.6f}".format(np.median(my_data))) # Median: 0.084608 ### SciPy print ("Variance with N in denominator: {0:8.6f}".format(sp.var(my_data))) # Variance with N in denominator: 1.011191 print ("Variance with N-1 in denominator: {0:8.6f}".format(sp.var(my_data, ddof=1))) # Variance with N-1 in denominator: 1.021405 print ("Std. Deviation: {0:8.6f}".format(sp.std(my_data))) # Std. Deviation: 1.005580 print ("Skew: {0:8.6f}".format(stats.skew(my_data))) # Skew: -0.085338 print ("Kurtosis: {0:8.6f}".format(stats.kurtosis(my_data))) # Kurtosis: -0.511248 print ("Describe: "), stats.describe(my_data)
def extend_x(arr, additions=True, extension=True): if extension: x.extend(arr) if additions: x.append(scipy.std(arr)) x.append(scipy.var(arr)) x.append(sum(arr) / len(arr)) x.append(sum(np.abs(arr)) / len(arr)) x.append(min(arr)) x.append(max(arr)) x.append(scipy.mean(arr)) x.append(scipy.median(arr))
def format_results(kernel, times): ''' Convenience function to convert the results of the timeit function into a dictionary. ''' res = dict() res["kernel"] = kernel res["avg"] = scipy.mean(times) res["var"] = scipy.var(times) res["max"] = max(times) res["min"] = min(times) return res
def stats(self, startdate, enddate, mktbasket, avdate, output=False, mappingoverride=None): """ Calculates statistics for a fund over a period. Parameters ---------- startdate : datetime beginning of statistic period enddate : datetime end of statistic period mktbasket : dict dictionary of market streams output : bool if True, output results to db mappingoverride : None or mapping dictionary whether to override the db mapping Returns ------- stats : dict dictionary of statistics """ actualstream, projstream = self.project(mktbasket, mappingoverride) if actualstream[startdate:enddate] is None: return None if projstream[startdate:enddate] is None: return None actual = actualstream[startdate:enddate].returns projected = projstream[startdate:enddate].returns diff = actual - projected outdata = { 'TE' : scipy.std(diff) * 100.0 * 100.0, 'BETA' : scipy.cov(projected, actual, bias=1)[1, 0] / scipy.var(projected), 'ALPHA' : (scipy.product(diff + 1.0)) ** (1.0 / diff.size) - 1.0, 'VOL' : scipy.std(actual) * scipy.sqrt(252.0), 'PROJ' : scipy.product(1.0 + projected) - 1.0, 'ACT' : scipy.product(1.0 + actual) - 1.0, 'R2' : 0.0 if scipy.all(actual == 0.0) else scipy.corrcoef(projected, actual)[1, 0] ** 2.0, 'AV' : self.av(avdate), 'DELTA' : self.deltaestimate(avdate) } outdata['DIFF'] = outdata['ACT'] - outdata['PROJ'] outdata['PL'] = outdata['DELTA'] * outdata['DIFF'] * 100.0 if output: cnxn = pyodbc.connect(ORACLESTRING) cursor = cnxn.cursor() sql = 'INSERT INTO FUNDOUTPUT VALUES ({0!s},{1!s},{2!s},{3!s},{4!s},{5!s},{6},{7},{8!s},{9!s},{10!s},{11!s},{12!s},{13!s});' sql = sql.format(self.fundcode, outdata['PROJ'], outdata['ACT'], outdata['DIFF'], outdata['DELTA'], outdata['PL'], oracledatebuilder(startdate), oracledatebuilder(enddate), outdata['TE'], outdata['R2'], outdata['BETA'], outdata['ALPHA'], outdata['VOL'], outdata['AV']) cursor.execute(sql) cnxn.commit() cnxn.close() return outdata
self.mapping[indexes[i]] = finalbeta[i] return self.mapping def stats(self, startdate, enddate, mktbasket, output = False): """ Calculates statistics for a fund over a period. Parameters ---------- startdate : datetime beginning of statistic period enddate : datetime end of statistic period mktbasket : dict dictionary of market streams output : bool if True, output results to db Returns ------- stats : dict dictionary of statistics """ inputmatrix, fundreturns, indexes, daterange = self.align(startdate, enddate, mktbasket) if self.mapping and not(inputmatrix is None): weights = scipy.array([self.mapping[mykey] if mykey in self.mapping else 0.0 for mykey in mktbasket.keys()]) projected = scipy.dot(inputmatrix,weights.reshape(len(indexes),1)).flatten() actual = fundreturns.flatten() diff = actual-projected outdata = { 'TE' : scipy.std(diff)*100.0*100.0, 'BETA' : scipy.cov(projected,actual)[1,0]/scipy.var(projected), 'ALPHA' : (scipy.product(diff+1.0))**(1.0/diff.size)-1.0, 'VOL' : scipy.std(actual)*scipy.sqrt(252.0), 'PROJ' : scipy.product(1.0+projected)-1.0, 'ACT' : scipy.product(1.0+actual)-1.0, 'R2' : 0.0 if scipy.all(actual==0.0) else scipy.corrcoef(projected,actual)[1,0]**2.0, 'AV' : self.av(startdate), 'DELTA' : self.deltaestimate(startdate) } outdata['DIFF'] = outdata['ACT']-outdata['PROJ'] outdata['PL'] = outdata['DELTA']*outdata['DIFF']*100.0 if output: cnxn = pyodbc.connect(ORACLESTRING) cursor = cnxn.cursor() sql = 'INSERT INTO FUNDOUTPUT VALUES ({0!s},{1!s},{2!s},{3!s},{4!s},{5!s},{6},{7},{8!s},{9!s},{10!s},{11!s},{12!s},{13!s});' sql = sql.format(self.fundcode,outdata['PROJ'],outdata['ACT'],outdata['DIFF'], outdata['DELTA'],outdata['PL'],oracledatebuilder(startdate), oracledatebuilder(enddate),outdata['TE'],outdata['R2'],outdata['BETA'], outdata['ALPHA'],outdata['VOL'],outdata['AV']) cursor.execute(sql) cnxn.commit() cnxn.close()