def mc_test(): import os import numpy as np from pyemu import MonteCarlo, Cov jco = os.path.join("pst","pest.jcb") pst = jco.replace(".jcb",".pst") out_dir = os.path.join("mc") if not os.path.exists(out_dir): os.mkdir(out_dir) #write testing mc = MonteCarlo(jco=jco,verbose=True,sigma_range=6) cov = Cov.from_parameter_data(mc.pst,sigma_range=6) assert np.abs((mc.parcov.x - cov.x).sum()) == 0.0 mc.draw(10,obs=True) mc.write_psts(os.path.join("temp","real_")) mc.parensemble.to_parfiles(os.path.join("mc","real_")) mc = MonteCarlo(jco=jco,verbose=True) mc.draw(10,obs=True) print("prior ensemble variance:", np.var(mc.parensemble.loc[:,"mult1"])) projected_en = mc.project_parensemble(inplace=False) print("projected ensemble variance:", np.var(projected_en.loc[:,"mult1"])) import pyemu sc = pyemu.Schur(jco=jco) mc = MonteCarlo(pst=pst,parcov=sc.posterior_parameter,verbose=True) mc.draw(10) print("posterior ensemble variance:", np.var(mc.parensemble.loc[:,"mult1"]))
def test_mat_output(self): samples = GMM1([.9999, .0001], [0.0, 1.0], [0.000001, 0.000001], rng=self.rng, size=[40, 20]) assert samples.shape == (40, 20) assert -.001 < np.mean(samples) < .001, np.mean(samples) assert np.var(samples) < .0001, np.var(samples)
def testPdfOfSampleMultiDims(self): student = student_t.StudentT(df=[7., 11.], loc=[[5.], [6.]], scale=3.) self.assertAllEqual([], student.event_shape) self.assertAllEqual([], self.evaluate(student.event_shape_tensor())) self.assertAllEqual([2, 2], student.batch_shape) self.assertAllEqual([2, 2], self.evaluate(student.batch_shape_tensor())) num = 50000 samples = student.sample(num, seed=123456) pdfs = student.prob(samples) sample_vals, pdf_vals = self.evaluate([samples, pdfs]) self.assertEqual(samples.get_shape(), (num, 2, 2)) self.assertEqual(pdfs.get_shape(), (num, 2, 2)) self.assertNear(5., np.mean(sample_vals[:, 0, :]), err=.03) self.assertNear(6., np.mean(sample_vals[:, 1, :]), err=.03) self._assertIntegral(sample_vals[:, 0, 0], pdf_vals[:, 0, 0], err=0.02) self._assertIntegral(sample_vals[:, 0, 1], pdf_vals[:, 0, 1], err=0.02) self._assertIntegral(sample_vals[:, 1, 0], pdf_vals[:, 1, 0], err=0.02) self._assertIntegral(sample_vals[:, 1, 1], pdf_vals[:, 1, 1], err=0.02) if not stats: return self.assertNear( stats.t.var(7., loc=0., scale=3.), # loc d.n. effect var np.var(sample_vals[:, :, 0]), err=.4) self.assertNear( stats.t.var(11., loc=0., scale=3.), # loc d.n. effect var np.var(sample_vals[:, :, 1]), err=.4)
def _call(self, dataset): """Computes featurewise scores.""" attrdata = dataset.sa[self.__attr].value if np.issubdtype(attrdata.dtype, 'c'): raise ValueError("Correlation coefficent measure is not meaningful " "for datasets with literal labels.") samples = dataset.samples pvalue_index = self.__pvalue result = np.empty((dataset.nfeatures,), dtype=float) for ifeature in xrange(dataset.nfeatures): samples_ = samples[:, ifeature] corr = pearsonr(samples_, attrdata) corrv = corr[pvalue_index] # Should be safe to assume 0 corr_coef (or 1 pvalue) if value # is actually NaN, although it might not be the case (covar of # 2 constants would be NaN although should be 1) if np.isnan(corrv): if np.var(samples_) == 0.0 and np.var(attrdata) == 0.0 \ and len(samples_): # constant terms corrv = 1.0 - pvalue_index else: corrv = pvalue_index result[ifeature] = corrv return Dataset(result[np.newaxis])
def average_data(data): """ Find mean and std. deviation of data returned by ``simulate``. """ numnodes = data['nodes'] its = data['its'] its_mean = numpy.average(its) its_std = math.sqrt(numpy.var(its)) dead = data['dead'] dead_mean = 100.0*numpy.average(dead)/numnodes dead_std = 100.0*math.sqrt(numpy.var(dead))/numnodes immune = data['immune'] immune_mean = 100.0*numpy.average(immune)/numnodes immune_std = 100.0*math.sqrt(numpy.var(immune))/numnodes max_contam = data['max_contam'] max_contam_mean = 100.0*numpy.average(max_contam)/numnodes max_contam_std = 100.0*math.sqrt(numpy.var(max_contam))/numnodes normal = data['normal'] normal_mean = 100.0*numpy.average(normal)/numnodes normal_std = 100.0*math.sqrt(numpy.var(normal))/numnodes return {'its': (its_mean, its_std), 'nodes': numnodes, 'dead': (dead_mean, dead_std), 'immune': (immune_mean, immune_std), 'max_contam': (max_contam_mean, max_contam_std), 'normal': (normal_mean, normal_std)}
def featArray(data): sh = n.shape(data) freqs = n.linspace(0,sh[1],sh[1]) NNvar = n.zeros_like(data) dvar = n.var(n.abs(data)) for i in range(sh[0]): for j in range(sh[1]): #samples = [] #for p in range(100): k = n.random.randint(-1,1,size=1000) l = n.random.randint(-1,1,size=1000) # try: #samples = n.abs(data[k+i,l+j]) # except: # pass NNvar[i,j] = n.var(n.abs(data[k+i,l+j])) X1 = n.zeros((sh[0]*sh[1],3)) X1[:,0] = (n.real(data)).reshape(sh[0]*sh[1]) X1[:,1] = (n.imag(data)).reshape(sh[0]*sh[1]) #X1[:,2] = (n.log10(n.abs(NNvar)) - n.median(n.log10(n.abs(NNvar)))).reshape(sh[0]*sh[1]) NNvar = NNvar - n.median(NNvar) X1[:,2] = (n.log10(n.abs(NNvar))).reshape(sh[0]*sh[1]) #X1[:,3] = (n.array([freqs]*sh[0])).reshape(sh[0]*sh[1]) #X1[:,4] = (n.array([times]*sh[1])).reshape(sh[0]*sh[1]) X1[n.abs(X1)>10**100] = 0 for m in range(X1.shape[1]): X1[:,m] = X1[:,m]/n.abs(X1[:,m]).max() X1 = n.nan_to_num(X1) return X1
def r2_score(y_true, y_pred, round_to=2): R"""R-squared for Bayesian regression models. Only valid for linear models. http://www.stat.columbia.edu/%7Egelman/research/unpublished/bayes_R2.pdf Parameters ---------- y_true: : array-like of shape = (n_samples) or (n_samples, n_outputs) Ground truth (correct) target values. y_pred : array-like of shape = (n_samples) or (n_samples, n_outputs) Estimated target values. round_to : int Number of decimals used to round results (default 2). Returns ------- `namedtuple` with the following elements: R2_median: median of the Bayesian R2 R2_mean: mean of the Bayesian R2 R2_std: standard deviation of the Bayesian R2 """ dimension = None if y_true.ndim > 1: dimension = 1 var_y_est = np.var(y_pred, axis=dimension) var_e = np.var(y_true - y_pred, axis=dimension) r2 = var_y_est / (var_y_est + var_e) r2_median = np.around(np.median(r2), round_to) r2_mean = np.around(np.mean(r2), round_to) r2_std = np.around(np.std(r2), round_to) r2_r = namedtuple('r2_r', 'r2_median, r2_mean, r2_std') return r2_r(r2_median, r2_mean, r2_std)
def test_hash_functions(): # Checks randomness of hash functions. # Variance and mean of each hash function (projection vector) # should be different from flattened array of hash functions. # If hash functions are not randomly built (seeded with # same value), variances and means of all functions are equal. n_samples = 12 n_features = 2 n_estimators = 5 rng = np.random.RandomState(42) X = rng.rand(n_samples, n_features) lshf = ignore_warnings(LSHForest, category=DeprecationWarning)( n_estimators=n_estimators, random_state=rng.randint(0, np.iinfo(np.int32).max)) ignore_warnings(lshf.fit)(X) hash_functions = [] for i in range(n_estimators): hash_functions.append(lshf.hash_functions_[i].components_) for i in range(n_estimators): assert_not_equal(np.var(hash_functions), np.var(lshf.hash_functions_[i].components_)) for i in range(n_estimators): assert_not_equal(np.mean(hash_functions), np.mean(lshf.hash_functions_[i].components_))
def test_bernoulli_extract(self): fit = self.fit extr = fit.extract(permuted=True) assert -7.4 < np.mean(extr['lp__']) < -7.0 assert 0.1 < np.mean(extr['theta']) < 0.4 assert 0.01 < np.var(extr['theta']) < 0.02 # use __getitem__ assert -7.4 < np.mean(fit['lp__']) < -7.0 assert 0.1 < np.mean(fit['theta']) < 0.4 assert 0.01 < np.var(fit['theta']) < 0.02 # permuted=False extr = fit.extract(permuted=False) self.assertEqual(extr.shape, (1000, 4, 2)) self.assertTrue(0.1 < np.mean(extr[:, 0, 0]) < 0.4) # permuted=True extr = fit.extract('lp__', permuted=True) assert -7.4 < np.mean(extr['lp__']) < -7.0 extr = fit.extract('theta', permuted=True) assert 0.1 < np.mean(extr['theta']) < 0.4 assert 0.01 < np.var(extr['theta']) < 0.02 extr = fit.extract('theta', permuted=False) assert extr.shape == (1000, 4, 2) assert 0.1 < np.mean(extr[:, 0, 0]) < 0.4
def test_reductions(): assert compute(t.a.sum(), b) == 6 assert compute(t.a.min(), b) == 1 assert compute(t.a.max(), b) == 3 assert compute(t.a.mean(), b) == 2.0 assert abs(compute(t.a.std(), b) - np.std([1, 2, 3])) < 1e-5 assert abs(compute(t.a.var(), b) - np.var([1, 2, 3])) < 1e-5 assert abs(compute(t.a.std(unbiased=True), b) - np.std([1, 2, 3], ddof=1)) < 1e-5 assert abs(compute(t.a.var(unbiased=True), b) - np.var([1, 2, 3], ddof=1)) < 1e-5 assert len(list(compute(t.distinct(), b))) == 3 assert len(list(compute(t.a.distinct(), b))) == 3 assert compute(t.a.nunique(), b) == 3 assert isinstance(compute(t.a.nunique(), b), np.integer) assert compute(t.a.count(), b) == 3 assert isinstance(compute(t.date.count(), b), np.integer) assert compute(t.date.nunique(), b) == 2 assert isinstance(compute(t.date.nunique(), b), np.integer) assert compute(t.date.count(), b) == 2 assert isinstance(compute(t.a.count(), b), np.integer) assert compute(t.a[0], b) == 1 assert compute(t.a[-1], b) == 3 assert compute(t[0], b) == compute(t[0], b) assert compute(t[-1], b) == compute(t[-1], b)
def _get_likelihood(self, model): """Compute the marginal likelihood of the linear model with a g-prior on betas. Parameters ---------- model : np.ndarray in R^ndim vector of variable inclusion indicators Returns ------- float log marginal likelihood """ X = self.X[:, model == 1] y = self.y nobs, ndim = X.shape design = np.hstack((np.ones((nobs, 1)), X)) mle = np.linalg.solve(np.dot(design.T, design), np.dot(design.T, y)) residuals = y - np.dot(design, mle) rsquared = 1 - np.var(residuals) / np.var(y) return (log_gamma((nobs - 1) / 2) - (nobs - 1) / 2 * np.log(np.pi) - 0.5 * np.log(nobs) - (nobs - 1) / 2 * np.log(np.dot(residuals, residuals)) + (nobs - ndim - 1) / 2 * np.log(1 + self.par["penalty"]) - (nobs - 1) / 2 * np.log(1 + self.par["penalty"] * (1 - rsquared)))
def welch_ttest (X, y): classes = np.unique(y) n_class = len(classes) n_feats = X.shape[1] b = np.zeros(n_feats) for i in np.arange(n_class): for j in np.arange(i+1, n_class): if j > i: xi = X[y == i, :] xj = X[y == j, :] yi = y[y == i] yj = y[y == j] mi = np.mean (xi, axis=0) mj = np.mean (xj, axis=0) vi = np.var (xi, axis=0) vj = np.var (xj, axis=0) n_subjsi = len(yi) n_subjsj = len(yj) t = (mi - mj) / np.sqrt((np.square(vi) / n_subjsi) + (np.square(vj) / n_subjsj)) t[np.isnan(t)] = 0 t[np.isinf(t)] = 0 b = np.maximum(b, t) return b
def curv_fit(x=None, y=None, model=None): x = np.array(x) y = np.array(y) params = lmfit.Parameters() if model == 'gaussian': mod = lmfit.models.GaussianModel() params = mod.guess(y, x=x) out = mod.fit(y,params, x=x) r_sq = 1 - out.residual.var()/np.var(y) elif model == '4PL': mod = lmfit.Model(logistic_4p) params.add('la', value=1.0) params.add('gr', value=120.0, vary=False) params.add('ce', value=150.0) params.add('ua', value=3.0) out = mod.fit(y, params,x=x) r_sq = 1 - out.residual.var()/np.var(y) elif model == '5PL': mod = lmfit.Model(logistic_5p) params.add('la', value=1.0) params.add('gr', value=1.0) params.add('ce', value=1.0) params.add('ua', value=1.0) params.add('sy', value=1.0) out = mod.fit(y, params, x=x) r_sq = 1 - out.residual.var()/np.var(y) out.R_sq = r_sq return out
def explained_variance_score(y_true, y_pred): """Explained variance regression score function Best possible score is 1.0, lower values are worse. Note: the explained variance is not a symmetric function. return the explained variance Parameters ---------- y_true : array-like y_pred : array-like """ y_true, y_pred = check_arrays(y_true, y_pred) numerator = np.var(y_true - y_pred) denominator = np.var(y_true) if denominator == 0.0: if numerator == 0.0: return 1.0 else: # arbitary set to zero to avoid -inf scores, having a constant # y_true is not interesting for scoring a regression anyway return 0.0 return 1 - numerator / denominator
def bhattacharyya_dist (X, y): classes = np.unique(y) n_class = len(classes) n_feats = X.shape[1] b = np.zeros(n_feats) for i in np.arange(n_class): for j in np.arange(i+1, n_class): if j > i: xi = X[y == i, :] xj = X[y == j, :] mi = np.mean (xi, axis=0) mj = np.mean (xj, axis=0) vi = np.var (xi, axis=0) vj = np.var (xj, axis=0) si = np.std (xi, axis=0) sj = np.std (xj, axis=0) d = 0.25 * (np.square(mi - mj) / (vi + vj)) + 0.5 * (np.log((vi + vj) / (2*si*sj))) d[np.isnan(d)] = 0 d[np.isinf(d)] = 0 b = np.maximum(b, d) return b
def indivConfInter(self, data): if type(data) is float: med = numpy.median(data) mean = numpy.mean(data) stdDev = math.sqrt(numpy.var(data)) #confidence interval ci95low = mean - 10 * (1.96 * stdDev) ci95up = mean + 10 * (1.96 * stdDev) #confidence level cl95low = med - (1.96 * stdDev) cl95up = med + (1.96 * stdDev) return [med, mean, ci95low, ci95up, cl95low, cl95up] elif len(data) > 0: med = numpy.median(data) mean = numpy.mean(data) stdDev = math.sqrt(numpy.var(data)) ci95low = mean - 10 * (1.96 * (stdDev / math.sqrt(len(data)))) ci95up = mean + 10 * (1.96 * (stdDev / math.sqrt(len(data)))) cl95low = med - (1.96 * (stdDev / math.sqrt(len(data)))) cl95up = med + (1.96 * (stdDev / math.sqrt(len(data)))) return [med, mean, ci95low, ci95up, cl95low, cl95up] else: return [None, None, None, None, None, None]
def calc_twosample_ts(propGroup1, propGroup2): n1 = len(propGroup1[0]) n2 = len(propGroup2[0]) numFeatures = len(propGroup1) T_statistics = [] effectSizes = [] notes = [] for r in xrange(0, numFeatures): meanG1 = float(sum(propGroup1[r])) / n1 varG1 = var(propGroup1[r], ddof=1) stdErrG1 = varG1 / n1 meanG2 = float(sum(propGroup2[r])) / n2 varG2 = var(propGroup2[r], ddof=1) stdErrG2 = varG2 / n2 dp = meanG1 - meanG2 effectSizes.append(dp * 100) denom = math.sqrt(stdErrG1 + stdErrG2) if denom == 0: notes.append("degenerate case: zero variance for both groups; variance set to 1e-6.") T_statistics.append(dp / 1e-6) else: notes.append("") T_statistics.append(dp / denom) return T_statistics, effectSizes, notes
def drawPlot(m): print("Poly degree is %d" % m) A = getAMatrix(x,m,sigma) B = getBColumn(x,y,m,sigma) #print("A matrix is") #print(A) #print("B column is") #print(B) c,v = solveKramer(A, B) polyCurve = np.poly1d(c) ty = polyCurve(x) #teoretic y print("calculated coeficients are:") print(c) print("coef variance:") print(v) #polyfit throws this error sometimes: when rank(A)< m(equiv A cannot be inversed and there is no unique solution)? try: polyC, polyV = np.polyfit(x,y,m, full=False, cov=True) print("coef from polyfit are:") print(polyC) print("covariance matrix from polyfit are:") print(polyV) except ValueError as err: print("Error in numpy.polyfit " ) print(err) print("-------------------------------------------------------------------") print("goodness=R**2=%4.3f"% (1 - np.var(np.subtract(ty,y)) / np.var(y)) ) #corrected division by 0 error when m==n print("avg unc=sigma**2=%4.3f"% ((1.0 / ((n-m) if n!=m else 1) ) * np.sum(np.power(np.subtract(ty,y),2))) ) l.set_ydata(ty) ax.relim() ax.autoscale_view(True,True,True) plt.draw()
def fig_spesen(spe, sen, fname='fig_model.png', leg=False): """Plot the specificity for the early, middle and end section""" fig, ax = plt.subplots(figsize=(3, 3)) fa_rate = 1 - np.mean(spe, axis=0) fa_err = np.var(spe, axis=0) hits_rate = np.mean(sen, axis=0) hits_err = np.var(sen, axis=0) ax.plot(np.arange(1, 4), hits_rate, color='#47bb3a', linewidth=1) ax.plot(np.arange(1, 4), fa_rate, color='#ec1d2a', linewidth=1) width = 0.5 ax.bar(np.arange(1, 4) - width/2., hits_rate, width, yerr=hits_err, color='#a7db60', label='HIT rate', error_kw=dict(ecolor='black', lw=1, capsize=2.5, capthick=1)) ax.bar(np.arange(1, 4) - width/2., fa_rate, width, yerr=fa_err, color='#f2507b', label='FA rate', error_kw=dict(ecolor='black', lw=1, capsize=2.5, capthick=1)) plt.xlim(0, 4) plt.ylim(0, 0.9) plt.ylabel("Response Rate") plt.xlabel(r'Session Start $\rightarrow$ Session End') ax.set_xticks(range(1, 4)) ax.set_xticklabels(["Initial", "Middle", "Final"], fontsize=10) colors = ("red", "orange", "green") [t.set_color(colors[i]) for i, t in enumerate(plt.gca().get_xticklabels())] adjust_spines(ax, ["bottom", "left"]) if leg: ax.legend(fontsize=10) plt.tight_layout() plt.savefig(fname)
def test_pairwise_distances_data_derived_params(n_jobs, metric, dist_function, y_is_x): # check that pairwise_distances give the same result in sequential and # parallel, when metric has data-derived parameters. with config_context(working_memory=1): # to have more than 1 chunk rng = np.random.RandomState(0) X = rng.random_sample((1000, 10)) if y_is_x: Y = X expected_dist_default_params = squareform(pdist(X, metric=metric)) if metric == "seuclidean": params = {'V': np.var(X, axis=0, ddof=1)} else: params = {'VI': np.linalg.inv(np.cov(X.T)).T} else: Y = rng.random_sample((1000, 10)) expected_dist_default_params = cdist(X, Y, metric=metric) if metric == "seuclidean": params = {'V': np.var(np.vstack([X, Y]), axis=0, ddof=1)} else: params = {'VI': np.linalg.inv(np.cov(np.vstack([X, Y]).T)).T} expected_dist_explicit_params = cdist(X, Y, metric=metric, **params) dist = np.vstack(tuple(dist_function(X, Y, metric=metric, n_jobs=n_jobs))) assert_allclose(dist, expected_dist_explicit_params) assert_allclose(dist, expected_dist_default_params)
def statprint(host_per_pg, pg_per_host): val = pg_per_host.values() # sets val to a list of the values in pg_per_host mean = numpy.mean(val) maxvalue = numpy.amax(val) minvalue = numpy.amin(val) std = numpy.std(val) median = numpy.median(val) variance = numpy.var(val) print("for placement groups on hosts: ") print( "the mean is: ", mean) print( "the max value is: ", maxvalue) print( "the min value is: ", minvalue) print( "the standard deviation is: ", std) print( "the median is: ", median) print( "the variance is: ", variance) # prints statements for stats host_mean = numpy.mean(host_per_pg) host_max = numpy.amax(host_per_pg) host_min = numpy.amin(host_per_pg) host_std = numpy.std(host_per_pg) host_median = numpy.median(host_per_pg) host_variance = numpy.var(host_per_pg) # these are the variables for hosts/pgs print("hosts per placement group: ") print("the mean is: ", host_mean) print("the max value is: ", host_max) print("the min value is: ", host_min) print("the standard deviation is: ", host_std) print("the median is: ", host_median) print("the variance is: ", host_variance)
def XDapogee(options,args): #First load the chains savefile= open(args[0],'rb') thesesamples= pickle.load(savefile) savefile.close() vcs= numpy.array([s[0] for s in thesesamples])*_APOGEEREFV0/_REFV0 dvcdrs= numpy.array([s[6] for s in thesesamples])*30. #To be consistent with this project's dlnvcdlnr print numpy.mean(vcs) print numpy.mean(dvcdrs) #Now fit XD to the 2D PDFs ydata= numpy.zeros((len(vcs),2)) ycovar= numpy.zeros((len(vcs),2)) ydata[:,0]= numpy.log(vcs) ydata[:,1]= dvcdrs vcxamp= numpy.ones(options.g)/options.g vcxmean= numpy.zeros((options.g,2)) vcxcovar= numpy.zeros((options.g,2,2)) for ii in range(options.g): vcxmean[ii,:]= numpy.mean(ydata,axis=0)+numpy.std(ydata,axis=0)*numpy.random.normal(size=(2))/4. vcxcovar[ii,0,0]= numpy.var(ydata[:,0]) vcxcovar[ii,1,1]= numpy.var(ydata[:,1]) extreme_deconvolution.extreme_deconvolution(ydata,ycovar, vcxamp,vcxmean,vcxcovar) save_pickles(options.plotfile, vcxamp,vcxmean,vcxcovar) print vcxamp print vcxmean[:,0] print vcxmean[:,1] return None
def AsianCallSimPrice(S0, K, T, r, sigma, M, I, CV=False): dt = T / M S = np.zeros((M + 1, I)) z = np.random.standard_normal((M + 1, I)) # pseudorandom numbers Savg = np.zeros(I) S[0] = S0 S = S0 * np.exp(np.cumsum((r - 0.5 * sigma ** 2) * dt + sigma * np.sqrt(dt) * z, axis=0)) Savg = np.average(S, axis=0) if CV == False: price = np.exp(-r * T) * np.sum(np.maximum(Savg - K, 0)) / I error = math.sqrt(np.var(np.maximum(Savg - K, 0))) / math.sqrt(I) result = (price, error) else: Tvector = np.arange(dt, T + dt, dt) T_avg = Tvector.mean() i_vector = np.arange(1, 2 * M + 1, 2) sigma_avg = math.sqrt(sigma ** 2 / (M ** 2 * T_avg) * np.dot(i_vector, Tvector[::-1])) delta = 0.5 * (sigma ** 2 - sigma_avg ** 2) d = (math.log(S0 / K) + (r - delta + 0.5 * sigma_avg ** 2) * T_avg) / (sigma_avg * math.sqrt(T_avg)) GeomAsianCall = np.exp(-delta * T_avg) * S0 * scipy.stats.norm.cdf(d) - np.exp( -r * T_avg ) * K * scipy.stats.norm.cdf(d - sigma_avg * math.sqrt(T_avg)) S_CV = scipy.stats.mstats.gmean(S, axis=0) X = np.exp(-r * T) * np.maximum(S_CV - K, 0) Y = np.exp(-r * T) * np.maximum(Savg - K, 0) b = np.cov(X, Y)[0][1] / X.var() price = Y.mean() - b * (X.mean() - GeomAsianCall) error = math.sqrt(np.var(Y - b * X)) / math.sqrt(I) rho = np.corrcoef(X, Y)[0][1] result = (price, error, rho) return result
def log_evidence(X, y, g): """Compute the model's log evidence (a.k.a. marginal likelihood). Parameters ---------- X : np.ndarray in R^(nobs x ndim) feature matrix y : np.ndarray in R^nobs target vector g : float (0, inf) dimensionality penalty Returns ------- float log evidence """ n, d = X.shape X_int = np.hstack((np.ones((n, 1)), X)) mle = np.linalg.solve(np.dot(X_int.T, X_int), np.dot(X_int.T, y)) resid = y - np.dot(X_int, mle) rsq = (d > 0 and 1 - np.var(resid) / np.var(y)) or 0 return (log_gamma((n - 1) / 2) - (n - 1) / 2 * np.log(np.pi) - 0.5 * np.log(n) - (n - 1) / 2 * np.log(np.dot(resid, resid)) + (n - d - 1) / 2 * np.log(1 + 1 / g) - (n - 1) / 2 * np.log(1 + 1 / g * (1 - rsq)))
def main(): images, labels = load_labeled_training(flatten=True) images = standardize(images) unl = load_unlabeled_training(flatten=True) unl = standardize(unl) test = load_public_test(flatten=True) test = standardize(test) shuffle_in_unison(images, labels) #d = DictionaryLearning().fit(images) d = MiniBatchDictionaryLearning(n_components=500, n_iter=500, verbose=True).fit(images) s = SparseCoder(d.components_) proj_test = s.transform(images) pt = s.transform(test) #kpca = KernelPCA(kernel="rbf") #kpca.fit(unl) #test_proj = kpca.transform(images) #pt = kpca.transform(test) #spca = SparsePCA().fit(unl) #test_proj = spca.transform(images) #pt = spca.transform(test) svc = SVC() scores = cross_validation.cross_val_score(svc, proj_test, labels, cv=10) print scores print np.mean(scores) print np.var(scores) svc.fit(proj_test, labels) pred = svc.predict(pt) write_results(pred, '../svm_res.csv')
def findvdisp3(self,r,v,mags,r200,maxv): "use red sequence to find members" binedge = np.arange(0,r200+1,0.3) rin = r vin = v colin = mags.T[1] - mags.T[2] avg_c = np.average(colin) vfinal = np.array([]) for i in range(binedge.size-1): i += 1 x = rin[np.where((rin>binedge[i-1]) & (rin<binedge[i]))] y = vin[np.where((rin>binedge[i-1]) & (rin<binedge[i]))] c = colin[np.where((rin>binedge[i-1]) & (rin<binedge[i]))] for k in range(6): y2 = y x2 = x c2 = c stv = 3.5 * np.std(y2) y = y2[np.where((y2 > -stv) & (y2 < stv) | ((c2<avg_c+0.04) & (c2>avg_c-0.04)))] x = x2[np.where((y2 > -stv) & (y2 < stv) | ((c2<avg_c+0.04) & (c2>avg_c-0.04)))] c = c2[np.where((y2 > -stv) & (y2 < stv) | ((c2<avg_c+0.04) & (c2>avg_c-0.04)))] vstd2 = np.std(y) vvar2 = np.var(y) print 'standard dev of zone %i = %f' % (i,vstd2) vfinal = np.append(y[np.where((y<vvar2) & (y>-vvar2))],vfinal) return np.var(vfinal)
def classify_2d(data_a, data_b, x): x1 = x[0] x2 = x[1] probability_a = data_a.shape[1] / (data_a.shape[1] + data_b.shape[1]) probability_b = data_b.shape[1] / (data_a.shape[1] + data_b.shape[1]) mean_x1_a = np.mean(data_a[0,:]) mean_x2_a = np.mean(data_a[1,:]) mean_x1_b = np.mean(data_b[0,:]) mean_x2_b = np.mean(data_b[1,:]) variance_x1_a = np.var(data_a[0,:]) variance_x2_a = np.var(data_a[1,:]) variance_x1_b = np.var(data_b[0,:]) variance_x2_b = np.var(data_b[1,:]) pd_x1_given_a = mlab.normpdf(x1, mean_x1_a, variance_x1_a) pd_x2_given_a = mlab.normpdf(x2, mean_x2_a, variance_x2_a) pd_x1_given_b = mlab.normpdf(x1, mean_x1_b, variance_x1_b) pd_x2_given_b = mlab.normpdf(x2, mean_x2_b, variance_x2_b) posterior_numerator_a = probability_a * pd_x1_given_a * pd_x2_given_a posterior_numerator_b = probability_b * pd_x1_given_b * pd_x2_given_b posterior_numerators = { 'A': posterior_numerator_a, 'B': posterior_numerator_b } return max(posterior_numerators.iterkeys(), key=(lambda k: posterior_numerators[k]))
def calc_com(mask): pts = index_to_zyx( mask ) z = pts[0,:].astype(float).mean() # Correct Center of Mass for reentrant domain y1 = pts[1,:].astype(float) x1 = pts[2,:].astype(float) y2 = (y1 < ny/2.)*y1 + (y1>= ny/2.)*(y1 - ny) x2 = (x1 < nx/2.)*x1 + (x1>= nx/2.)*(x1 - nx) y1m = y1.mean() y2m = y2.mean() x1m = x1.mean() x2m = x2.mean() if numpy.var(y2 - y2m) > numpy.var(y1 - y1m): y = y1m else: y = (y2m + .5)%ny - .5 if numpy.var(x2 - x2m) > numpy.var(x1 - x1m): x = x1m else: x = (x2m + .5)%nx - .5 return numpy.array((z, y, x))
def _tTest(x, y, exclude=95): """Compute a one-sided Welsh t-statistic.""" with np.errstate(all="ignore"): def cappedSlog(v): q = np.percentile(v, exclude) v2 = v.copy() v2 = v2[~np.isnan(v2)] v2[v2 > q] = q v2[v2 <= 0] = 1. / (75 + 1) return np.log(v2) x1 = cappedSlog(x) x2 = cappedSlog(y) sx1 = np.var(x1) / len(x1) sx2 = np.var(x2) / len(x2) totalSE = np.sqrt(sx1 + sx2) if totalSE == 0: stat = 0 else: stat = (np.mean(x1) - np.mean(x2)) / totalSE #df = (sx1 + sx2)**2 / (sx1**2/(len(x1)-1) + sx2**2/(len(x2) - 1)) #pval = 1 - scidist.t.cdf(stat, df) # Scipy's t distribution CDF implementaton has inadequate # precision. We have switched to the normal distribution for # better behaved p values. pval = 0.5 * erfc(stat / sqrt(2)) return {'testStatistic': stat, 'pvalue': pval}
def calc_error(data): """ Error estimation for time series of simulation observables and take into account that these series are to some kind degree correlated (which enhances the estimated statistical error). """ # calculate the normalized autocorrelation function of data acf = autocorrelation(data) # calculate the integrated correlation time tau_int # (Janke, Wolfhard. "Statistical analysis of simulations: Data correlations # and error estimation." Quantum Simulations of Complex Many-Body Systems: # From Theory to Algorithms 10 (2002): 423-445.) tau_int = 0.5 for i in range(len(acf)): tau_int += acf[i] if ( i >= 6 * tau_int ): break # mean value of the time series data_mean = np.mean(data) # calculate the so called effective length of the time series N_eff if (tau_int > 0.5): N_eff = len(data) / (2.0 * tau_int) # finally the error is sqrt(var(data)/N_eff) stat_err = np.sqrt(np.var(data) / N_eff) else: stat_err = np.sqrt(np.var(data) / len(data)) return data_mean, stat_err
sys.getsizeof(graph_data[1].Ri) + sys.getsizeof(graph_data[1].Ro) + sys.getsizeof(graph_data[1].y) + sys.getsizeof(graph_data[1].a))/10.**6) n_nodes, n_edges = get_shape(graph_data) node_counts.append(n_nodes) edge_counts.append(n_edges) truth_eff = get_truth_efficiency(i, graph_data, truth_table) seg_eff = get_segment_efficiency(graph_data) truth_effs.append(truth_eff) seg_effs.append(seg_eff) avg_seg_eff = [np.mean(seg_effs), np.sqrt(np.var(seg_effs))] avg_truth_eff = [np.mean(truth_effs), np.sqrt(np.var(truth_effs))] avg_nodes = [np.mean(node_counts), np.sqrt(np.var(node_counts))] avg_edges = [np.mean(edge_counts), np.sqrt(np.var(edge_counts))] avg_size = [np.mean(sizes), np.sqrt(np.var(sizes))] # print out a brief report of the measurements data_tag = " ***** pt=" + pt_cuts[i] + " data ***** " print("{0}\n \t seg_eff: {1} +/- {2} \n \t truth_eff: {3} +/- {4}" .format(data_tag, np.round(avg_seg_eff[0], decimals=3), np.round(avg_seg_eff[1], decimals=3), np.round(avg_truth_eff[0], decimals=3), np.round(avg_truth_eff[1], decimals=3))) print("\t nodes: {0} +/- {1} \n \t edges: {2} +/- {3}" .format(np.round(avg_nodes[0], decimals=3),
frame = pyfits.getdata(dir+filename_sub+str(files[i])+'.fits') cds = fluxdir*(frame-f9) #Define reference pixels ref = [0,1,2,3, len(cds)-1, len(cds)-2, len(cds)-3, len(cds)-4] #subtract median of reference pixels for j in range((cds.shape)[1]): cds[ : , j] -= np.median(cds[ref, j]) for j in range(n_crops): crop = cds[center+yshifts[j] : center+yshifts[j]+70 , center+1+(j-n_crops/2)*64 : center-1+(j-n_crops/2)*64 +64 ] #crosstalk correction increases the variance but does not affect the signal mask = masks[j*2:j*2+2] noisearr[i , j] = np.var(crop[mask], ddof=1) * correction fluxarr[i , j] = np.mean(crop[mask]) #verify masking is being reasonable if i == len(files)-1: myrange = [np.median(crop)-10.*np.std(crop[mask]) , np.median(crop)+10.*np.std(crop[mask])] junk = plt.hist(crop.flatten(), bins=50, range=myrange) plt.title("Pixel Brightness Histogram, CDS "+str(max(files))+'-09') plt.xlabel('ADU') plt.ylabel('N_Occurences') plt.vlines( (np.min(crop[mask]) , np.max(crop[mask])) , 0.1, 1.2*np.max(junk[0]), colors='r') plt.show()
if j > 11 and j < (length - 1): data = line.split(",") #print ("J= ", j) interface = data[2] #print ("interafce= ", interface) inter = interface.split("/") #print (inter[0], inter[1], inter[2]) if data[3] == "6\n" and inter[2] == str(i): #print (data[0], data[1], data[2], data[3]) portno = int(data[1]) #print ("port no type = ", type(portno)) #print ("port no = ", portno) port.insert(k, portno) k = k + 1 portlen = len(port) port.sort() if portlen > 0: max = port[portlen - 1] min = port[0] avg = math.ceil(numpy.mean(port)) var = math.ceil(numpy.var(port)) std = math.ceil(numpy.std(port)) print("### interface= G1/0/", i, "tcp port numbers= ", portlen, "gap= ", max - min, "max= ", max, "min =", min, "avg= ", avg, "var= ", var, "std= ", std, "tcp ports are= ", port)
min_res = min(res) inversas = [] for value in res: k = int(math.floor((value - delta) / w)) # Este es mi indice de bin #interpola los valores de la F^-1(x) inv = ((value - (delta + k * w)) / w) * (x[k + 1] - x[k]) + x[k] #inv = np.interp(value,[delta+k*w, delta+(k+1)*w],[x[k],x[k+1]]) inversas.append(inv) #print inversas max_inv = max(inversas) min_inv = min(inversas) #h = [0] * m #for value in inversas: # k = int(math.floor((value+gamma)/q)) # h[k] = h[k]+1 #print sum(h) #p, bins, patches = plt.hist(inversas, m , density=True, facecolor='g', alpha=0.75) inversas_2 = [] for i in inversas: inversas_2.append(round(i, 2)) print "Media Calculada: " + str(np.mean(inversas_2)) + " vs " + "0" print "Desvio Calculado: " + str(np.var(inversas_2)) + " vs " + "1" print "Moda Calculada: " + str(stats.mode(inversas_2)[0]) + " vs " + "0" plt.hist(inversas_2, 50) plt.show()
print(np.mean(a2, axis = 0)) print(np.mean(a2, axis = 1)) print() # std() : 표준 편차 계산 print('std() : 표준 편차 계산') print(a2) print(np.std(a2)) print(np.std(a2, axis = 0)) print(np.std(a2, axis = 1)) print() # var() : 분산 계산 print('var() : 분산 계산') print(a2) print(np.var(a2)) print(np.var(a2, axis = 0)) print(np.var(a2, axis = 1)) print() # min() : 최소값 print('min() : 최소값') print(a2) print(np.min(a2)) print(np.min(a2, axis = 0)) print(np.min(a2, axis = 1)) print() # max() : 최대값 print('max() : 최대값') print(a2)
Y = np.array([f(V2X(V[i,:])) for i in range(ned)]) # reformat if necessary if np.size(np.shape(Y))==1: Y = np.expand_dims(Y, axis=1) # build surrogates # fpce,_ = MM.build_fpce(V, Y, ppce) fpce = MM.build_spce(V, Y, Q, ppce, q) flra,_,_ = MM.build_cp(V, Y, R, plra) # compute generalization error (MC estimate) Ypce = np.array([fpce(Ve[i,:]) for i in range(nmc)]).flatten() Ylra = np.array([flra(Ve[i,:]) for i in range(nmc)]).flatten() err_pce = np.mean((Ye - Ypce)**2)/np.var(Ye) err_lra = np.mean((Ye - Ylra)**2)/np.var(Ye) print "\nGeneralization Error Comparison\n\n"\ "PCE: g_e = {0}\n"\ "LRA: g_e = {1}".format(err_pce,err_lra) # --------------------------------------------------------------------------- # density plots # --------------------------------------------------------------------------- de = ss.kde.gaussian_kde(np.abs(Ye)) dpce = ss.kde.gaussian_kde(np.abs(Ypce)) dlra = ss.kde.gaussian_kde(np.abs(Ylra)) y = np.linspace(min(np.abs(Ye)),max(np.abs(Ye)),100)
def var(x): return np.var(x, ddof=1)
def processData(self): self.NcascStart = self.par_obj.NcascStart self.NcascEnd = self.par_obj.NcascEnd self.Nsub = self.par_obj.Nsub self.winInt = self.par_obj.winInt #self.subChanArr, self.trueTimeArr, self.dTimeArr,self.resolution = pt3import(self.filepath) if self.ext == 'pt2': self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = pt2import( self.filepath) if self.ext == 'pt3': self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = pt3import( self.filepath) if self.ext == 'csv': self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = csvimport( self.filepath) if self.ext == 'spc': self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = spc_file_import( self.filepath) if self.ext == 'asc': self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = asc_file_import( self.filepath) self.subArrayGeneration(self.xmin, self.xmax) self.dTimeMin = self.parentId.dTimeMin self.dTimeMax = self.parentId.dTimeMax self.subDTimeMin = self.dTimeMin self.subDTimeMax = self.dTimeMax #Time series of photon counts. For visualisation. self.timeSeries1, self.timeSeriesScale1 = delayTime2bin( np.array(self.trueTimeArr) / 1000000, np.array(self.subChanArr), self.ch_present[0], self.photonCountBin) unit = self.timeSeriesScale1[-1] / self.timeSeriesScale1.__len__() self.kcount_CH1 = np.average(self.timeSeries1) raw_count = np.average( self.timeSeries1 ) #This is the unnormalised intensity count for int_time duration (the first moment) var_count = np.var(self.timeSeries1) self.brightnessNandBCH0 = (((var_count - raw_count) / (raw_count)) / (float(unit))) if (var_count - raw_count) == 0: self.numberNandBCH0 = 0 else: self.numberNandBCH0 = (raw_count**2 / (var_count - raw_count)) if self.numOfCH == 2: self.timeSeries2, self.timeSeriesScale2 = delayTime2bin( np.array(self.trueTimeArr) / 1000000, np.array(self.subChanArr), self.ch_present[1], self.photonCountBin) unit = self.timeSeriesScale2[-1] / self.timeSeriesScale2.__len__() self.kcount_CH2 = np.average(self.timeSeries2) raw_count = np.average( self.timeSeries2 ) #This is the unnormalised intensity count for int_time duration (the first moment) var_count = np.var(self.timeSeries2) self.brightnessNandBCH1 = (((var_count - raw_count) / (raw_count)) / (float(unit))) if (var_count - raw_count) == 0: self.numberNandBCH1 = 0 else: self.numberNandBCH1 = (raw_count**2 / (var_count - raw_count)) self.CV = calc_coincidence_value(self) #Adds names to the fit function for later fitting. if self.objId1 == None: corrObj = corrObject(self.filepath, self.fit_obj) self.objId1 = corrObj.objId self.objId1.parent_name = 'pt FCS tgated -tg0: ' + str( np.round(self.xmin, 0)) + ' -tg1: ' + str( np.round(self.xmax, 0)) self.objId1.parent_uqid = 'pt FCS tgated -tg0: ' + str( np.round(self.xmin, 0)) + ' -tg1: ' + str( np.round(self.xmax, 0)) self.fit_obj.objIdArr.append(corrObj.objId) self.objId1.param = copy.deepcopy(self.fit_obj.def_param) self.objId1.name = self.name + '_CH0_Auto_Corr' self.objId1.ch_type = 0 #channel 0 Auto self.objId1.siblings = None self.objId1.prepare_for_fit() self.objId1.kcount = self.kcount_CH1 self.objId1.autoNorm = np.array(self.autoNorm[:, 0, 0]).reshape(-1) self.objId1.autotime = np.array(self.autotime).reshape(-1) self.objId1.param = copy.deepcopy(self.fit_obj.def_param) if self.numOfCH == 2: self.objId1.CV = self.CV if self.objId3 == None: corrObj = corrObject(self.filepath, self.fit_obj) self.objId3 = corrObj.objId self.objId3.parent_name = 'pt FCS tgated -tg0: ' + str( np.round(self.xmin, 0)) + ' -tg1: ' + str( np.round(self.xmax, 0)) self.objId3.parent_uqid = 'pt FCS tgated -tg0: ' + str( np.round(self.xmin, 0)) + ' -tg1: ' + str( np.round(self.xmax, 0)) self.fit_obj.objIdArr.append(corrObj.objId) self.objId3.param = copy.deepcopy(self.fit_obj.def_param) self.objId3.name = self.name + '_CH1_Auto_Corr' self.objId3.ch_type = 1 #channel 1 Auto self.objId3.siblings = None self.objId3.prepare_for_fit() self.objId3.kcount = self.kcount_CH2 self.objId3.autoNorm = np.array(self.autoNorm[:, 1, 1]).reshape(-1) self.objId3.autotime = np.array(self.autotime).reshape(-1) self.objId3.param = copy.deepcopy(self.fit_obj.def_param) self.objId3.CV = self.CV if self.objId2 == None: corrObj = corrObject(self.filepath, self.fit_obj) self.objId2 = corrObj.objId self.objId2.parent_name = 'pt FCS tgated -tg0: ' + str( np.round(self.xmin, 0)) + ' -tg1: ' + str( np.round(self.xmax, 0)) self.objId2.parent_uqid = 'pt FCS tgated -tg0: ' + str( np.round(self.xmin, 0)) + ' -tg1: ' + str( np.round(self.xmax, 0)) self.objId2.param = copy.deepcopy(self.fit_obj.def_param) self.fit_obj.objIdArr.append(corrObj.objId) self.objId2.name = self.name + '_CH01_Cross_Corr' self.objId2.ch_type = 2 #channel 01 Cross self.objId2.siblings = None self.objId2.prepare_for_fit() self.objId2.autoNorm = np.array(self.autoNorm[:, 0, 1]).reshape(-1) self.objId2.autotime = np.array(self.autotime).reshape(-1) self.objId2.param = copy.deepcopy(self.fit_obj.def_param) self.objId2.CV = self.CV if self.objId4 == None: corrObj = corrObject(self.filepath, self.fit_obj) self.objId4 = corrObj.objId self.objId4.parent_name = 'pt FCS tgated -tg0: ' + str( np.round(self.xmin, 0)) + ' -tg1: ' + str( np.round(self.xmax, 0)) self.objId4.parent_uqid = 'pt FCS tgated -tg0: ' + str( np.round(self.xmin, 0)) + ' -tg1: ' + str( np.round(self.xmax, 0)) self.objId4.param = copy.deepcopy(self.fit_obj.def_param) self.fit_obj.objIdArr.append(corrObj.objId) self.objId4.name = self.name + '_CH10_Cross_Corr' self.objId4.ch_type = 3 #channel 10 Cross self.objId4.siblings = None self.objId4.prepare_for_fit() self.objId4.autoNorm = np.array(self.autoNorm[:, 1, 0]).reshape(-1) self.objId4.autotime = np.array(self.autotime).reshape(-1) self.objId4.CV = self.CV self.fit_obj.fill_series_list() #del self.subChanArr #self.trueTimeArr del self.dTimeArr
def get_video_feat(video, feat_dates): # 源数据 history = video[video['day'].map(lambda x: x in feat_dates)] history['cnt'] = 1 # 返回的特征 feature = pd.DataFrame(columns=['user_id']) ## 统计特征 pivot = pd.pivot_table(history, index=['user_id', 'day'], values='cnt', aggfunc=len) pivot = pivot.unstack(level=-1) pivot.fillna(0, downcast='infer', inplace=True) feat = pd.DataFrame() feat['user_id'] = pivot.index feat.index = pivot.index # 每一天的特征 for i in range(1, len(feat_dates) + 1): feat['user_video_cnt_before_' + str(i) + '_day'] = pivot[pivot.columns.tolist()[-i]] # 总和 feat['user_video_cnt_sum'] = pivot.sum(1) # 均值 feat['user_video_cnt_mean'] = pivot.mean(1) # 方差 feat['user_video_cnt_var'] = pivot.var(1) # 最大值 feat['user_video_cnt_max'] = pivot.max(1) # 最小值 feat['user_video_cnt_min'] = pivot.min(1) # 加入feature feature = pd.merge(feature, feat, on=['user_id'], how='outer') # ## 差分与统计 # diff = pivot.diff(axis = 1) # diff = diff[diff.columns.tolist()[1:]] # feat = pd.DataFrame() # feat['user_id'] = diff.index # feat.index = diff.index # # 每一个差分 # for i in range(1,len(feat_dates)): # feat['user_video_diff_before_' + str(i) + '_day'] = diff[diff.columns.tolist()[-i]] # # 总和 # feat['user_video_diff_sum'] = diff.sum(1) # # 均值 # feat['user_video_diff_mean'] = diff.mean(1) # # 方差 # feat['user_video_diff_var'] = diff.var(1) # # 最大值 # feat['user_video_diff_max'] = diff.max(1) # # 最小值 # feat['user_video_diff_min'] = diff.min(1) # # 加入feature # feature = pd.merge(feature,feat,on = ['user_id'],how = 'outer') ## 连续拍摄 feat = pd.DataFrame() feat['user_id'] = pivot.index feat.index = pivot.index pivot = pivot.applymap(lambda x: 1 if x != 0 else 0) feat['video_list'] = pivot.apply( lambda x: reduce(lambda y, z: str(y) + str(z), x), axis=1) # 连续拍摄天数_均值 feat['user_video_continue_mean'] = feat['video_list'].map( lambda x: np.mean([len(y) for y in re.split('0+', x.strip('0'))])) # 连续拍摄天数_方差 feat['user_video_continue_var'] = feat['video_list'].map( lambda x: np.var([len(y) for y in re.split('0+', x.strip('0'))])) # 连续拍摄天数_最大值 feat['user_video_continue_max'] = feat['video_list'].map( lambda x: np.max([len(y) for y in re.split('0+', x.strip('0'))])) # 连续拍摄天数_最小值 feat['user_video_continue_min'] = feat['video_list'].map( lambda x: np.min([len(y) for y in re.split('0+', x.strip('0'))])) # 去掉无用的 feat.drop(['video_list'], axis=1, inplace=True) # 加入feature feature = pd.merge(feature, feat, on=['user_id'], how='outer') ## 时间间隔 # 最近/远一次拍摄距离最近考察日的时间间隔 near = 'nearest_day_video' fur = 'furest_day_video' pivot_n = pd.pivot_table(history, index=['user_id'], values='day', aggfunc=max) pivot_n.rename(columns={'day': near}, inplace=True) pivot_n.reset_index(inplace=True) pivot_f = pd.pivot_table(history, index=['user_id'], values='day', aggfunc=min) pivot_f.rename(columns={'day': fur}, inplace=True) pivot_f.reset_index(inplace=True) feature = pd.merge(feature, pivot_n, on=['user_id'], how='left') feature = pd.merge(feature, pivot_f, on=['user_id'], how='left') feature[near + '_to_label'] = feature[near].map(lambda x: feat_dates[-1] + 1 - x) feature[fur + '_to_label'] = feature[fur].map(lambda x: feat_dates[-1] + 1 - x) feature.drop([near, fur], axis=1, inplace=True) ## 填空 feature.fillna(0, downcast='infer', inplace=True) ## 返回 return feature
def processData(self): self.NcascStart = self.par_obj.NcascStart self.NcascEnd = self.par_obj.NcascEnd self.Nsub = self.par_obj.Nsub self.winInt = self.par_obj.winInt self.photonCountBin = 25 #self.par_obj.photonCountBin #File import if self.ext == 'spc': self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = spc_file_import( self.filepath) if self.ext == 'asc': self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = asc_file_import( self.filepath) if self.ext == 'pt2': self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = pt2import( self.filepath) if self.ext == 'pt3': self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = pt3import( self.filepath) if self.ext == 'ptu': out = ptuimport(self.filepath) if out != False: self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = out else: self.par_obj.data.pop(-1) self.par_obj.objectRef.pop(-1) self.exit = True return if self.ext == 'csv': self.subChanArr, self.trueTimeArr, self.dTimeArr, self.resolution = csvimport( self.filepath) #If the file is empty. if self.subChanArr == None: #Undoes any preparation of resource. self.par_obj.data.pop(-1) self.par_obj.objectRef.pop(-1) self.exit = True return #Colour assigned to file. self.color = self.par_obj.colors[self.unqID % len(self.par_obj.colors)] #How many channels there are in the files. self.ch_present = np.sort(np.unique(np.array(self.subChanArr))) if self.ext == 'pt3' or self.ext == 'ptu' or self.ext == 'pt2': self.numOfCH = self.ch_present.__len__( ) - 1 #Minus 1 because not interested in channel 15. else: self.numOfCH = self.ch_present.__len__() #Finds the numbers which address the channels. #Calculates decay function for both channels. self.photonDecayCh1, self.decayScale1 = delayTime2bin( np.array(self.dTimeArr), np.array(self.subChanArr), self.ch_present[0], self.winInt) if self.numOfCH == 2: self.photonDecayCh2, self.decayScale2 = delayTime2bin( np.array(self.dTimeArr), np.array(self.subChanArr), self.ch_present[1], self.winInt) #Time series of photon counts. For visualisation. self.timeSeries1, self.timeSeriesScale1 = delayTime2bin( np.array(self.trueTimeArr) / 1000000, np.array(self.subChanArr), self.ch_present[0], self.photonCountBin) unit = self.timeSeriesScale1[-1] / self.timeSeriesScale1.__len__() #Converts to counts per self.kcount_CH1 = np.average(self.timeSeries1) raw_count = np.average( self.timeSeries1 ) #This is the unnormalised intensity count for int_time duration (the first moment) var_count = np.var(self.timeSeries1) self.brightnessNandBCH0 = (((var_count - raw_count) / (raw_count)) / (float(unit))) if (var_count - raw_count) == 0: self.numberNandBCH0 = 0 else: self.numberNandBCH0 = (raw_count**2 / (var_count - raw_count)) if self.numOfCH == 2: self.timeSeries2, self.timeSeriesScale2 = delayTime2bin( np.array(self.trueTimeArr) / 1000000, np.array(self.subChanArr), self.ch_present[1], self.photonCountBin) unit = self.timeSeriesScale2[-1] / self.timeSeriesScale2.__len__() self.kcount_CH2 = np.average(self.timeSeries2) raw_count = np.average( self.timeSeries2 ) #This is the unnormalised intensity count for int_time duration (the first moment) var_count = np.var(self.timeSeries2) self.brightnessNandBCH1 = (((var_count - raw_count) / (raw_count)) / (float(unit))) if (var_count - raw_count) == 0: self.numberNandBCH1 = 0 else: self.numberNandBCH1 = (raw_count**2 / (var_count - raw_count)) self.CV = calc_coincidence_value(self) #Calculates the Auto and Cross-correlation functions. self.crossAndAuto(np.array(self.trueTimeArr), np.array(self.subChanArr)) if self.fit_obj != None: #If fit object provided then creates fit objects. if self.objId1 == None: corrObj = corrObject(self.filepath, self.fit_obj) self.objId1 = corrObj.objId self.objId1.parent_name = 'point FCS' self.objId1.parent_uqid = 'point FCS' self.fit_obj.objIdArr.append(corrObj.objId) self.objId1.param = copy.deepcopy(self.fit_obj.def_param) self.objId1.name = self.name + '_CH0_Auto_Corr' self.objId1.ch_type = 0 #channel 0 Auto self.objId1.siblings = None self.objId1.prepare_for_fit() self.objId1.kcount = self.kcount_CH1 self.objId1.autoNorm = np.array(self.autoNorm[:, 0, 0]).reshape(-1) self.objId1.autotime = np.array(self.autotime).reshape(-1) self.objId1.param = copy.deepcopy(self.fit_obj.def_param) self.objId1.max = np.max(self.objId1.autoNorm) self.objId1.min = np.min(self.objId1.autoNorm) if self.numOfCH == 2: self.objId1.CV = self.CV if self.objId3 == None: corrObj = corrObject(self.filepath, self.fit_obj) self.objId3 = corrObj.objId self.objId3.parent_name = 'point FCS' self.objId3.parent_uqid = 'point FCS' self.objId3.param = copy.deepcopy(self.fit_obj.def_param) self.fit_obj.objIdArr.append(corrObj.objId) self.objId3.name = self.name + '_CH1_Auto_Corr' self.objId3.ch_type = 1 #channel 1 Auto self.objId3.siblings = None self.objId3.prepare_for_fit() self.objId3.kcount = self.kcount_CH2 self.objId3.autoNorm = np.array(self.autoNorm[:, 1, 1]).reshape(-1) self.objId3.autotime = np.array(self.autotime).reshape(-1) self.objId3.param = copy.deepcopy(self.fit_obj.def_param) self.objId3.max = np.max(self.objId3.autoNorm) self.objId3.min = np.min(self.objId3.autoNorm) self.objId3.CV = self.CV if self.objId2 == None: corrObj = corrObject(self.filepath, self.fit_obj) self.objId2 = corrObj.objId self.objId2.parent_name = 'point FCS' self.objId2.parent_uqid = 'point FCS' self.objId2.param = copy.deepcopy(self.fit_obj.def_param) self.fit_obj.objIdArr.append(corrObj.objId) self.objId2.name = self.name + '_CH01_Cross_Corr' self.objId2.ch_type = 2 #01cross self.objId2.siblings = None self.objId2.prepare_for_fit() self.objId2.autoNorm = np.array(self.autoNorm[:, 0, 1]).reshape(-1) self.objId2.autotime = np.array(self.autotime).reshape(-1) self.objId2.param = copy.deepcopy(self.fit_obj.def_param) self.objId2.max = np.max(self.objId2.autoNorm) self.objId2.min = np.min(self.objId2.autoNorm) self.objId2.CV = self.CV if self.objId4 == None: corrObj = corrObject(self.filepath, self.fit_obj) self.objId4 = corrObj.objId self.objId4.parent_name = 'point FCS' self.objId4.parent_uqid = 'point FCS' self.objId4.param = copy.deepcopy(self.fit_obj.def_param) self.fit_obj.objIdArr.append(corrObj.objId) self.objId4.name = self.name + '_CH10_Cross_Corr' self.objId4.ch_type = 3 #10cross self.objId4.siblings = None self.objId4.prepare_for_fit() self.objId4.autoNorm = np.array(self.autoNorm[:, 1, 0]).reshape(-1) self.objId4.autotime = np.array(self.autotime).reshape(-1) self.objId4.param = copy.deepcopy(self.fit_obj.def_param) self.objId4.max = np.max(self.objId4.autoNorm) self.objId4.min = np.min(self.objId4.autoNorm) self.objId4.CV = self.CV self.fit_obj.fill_series_list() self.dTimeMin = 0 self.dTimeMax = np.max(self.dTimeArr) self.subDTimeMin = self.dTimeMin self.subDTimeMax = self.dTimeMax self.exit = False #del self.subChanArr #del self.trueTimeArr del self.dTimeArr
def produce_plots(): # Generate Arrays of Random numbers test_arrays = [ np.random.random((2**5, 2**5)), np.random.random((2**6, 2**6)), np.random.random( (2**7, 2**7)), # DFT Mean around 5.5s, var around 0.0119s np.random.random( (2**8, 2**8)), # DFT Mean around 42.838s, var around 1.19s np.random.random(( 2**9, 2**9)), # DFT Mean around 372.367s, var around 5.299s (Super long) np.random.random((2**10, 2**10)) # (Going to take way too long to run) ] # Store results dimensions_array = [] dft_mean_array = [] dft_variance_array = [] fft_mean_array = [] fft_variance_array = [] for array in test_arrays: # Store problem size and append to designated array dimension = array.shape[0] dimensions_array.append(dimension) dft_results = [] fft_results = [] for i in range(10): # Naive DFT Method start_time = time.time() fouriertransform.dft_2d(array) end_time = time.time() dft_results.append(end_time - start_time) # FFT Method start_time = time.time() fouriertransform.fft_2d(array) end_time = time.time() fft_results.append(end_time - start_time) # Store dft mean and variance dft_mean = np.mean(dft_results) dft_mean_array.append(dft_mean) dft_variance = np.var(dft_results) dft_variance_array.append(dft_variance) # Store fft mean and variance fft_mean = np.mean(fft_results) fft_mean_array.append(fft_mean) fft_variance = np.var(fft_results) fft_variance_array.append(fft_variance) # Print mean and variance print('Array Dimensions: {} by {}'.format(dimension, dimension)) print("----------------------------------------") print("DFT Mean: ", dft_mean) print("FFT Mean: ", np.mean(fft_results)) print("DFT Variance: ", dft_variance) print("FFT Variance: ", np.var(fft_results)) print("----------------------------------------\n") # Plot Results # Error is standard deviation * 2 dft_errors = [math.sqrt(i) * 2 for i in dft_variance_array] fft_errors = [math.sqrt(i) * 2 for i in fft_variance_array] plt.errorbar(dimensions_array, dft_mean_array, yerr=dft_errors, ecolor="red", label='DFT') plt.errorbar(dimensions_array, fft_mean_array, color='green', yerr=fft_errors, ecolor="red", label='FFT') plt.title('Mean Time vs Problem Size') plt.xlabel('Problem Size', fontsize=14) plt.ylabel('Runtime (s)', fontsize=14) plt.legend(loc='upper left') plt.grid(True) plt.show()
def run_CV(self): cvIter = 0 totalInstanceNum = len(self.m_targetLabel) print("totalInstanceNum\t", totalInstanceNum) indexList = [i for i in range(totalInstanceNum)] totalTransferNumList = [] np.random.seed(3) np.random.shuffle(indexList) foldNum = 10 foldInstanceNum = int(totalInstanceNum * 1.0 / foldNum) foldInstanceList = [] for foldIndex in range(foldNum - 1): foldIndexInstanceList = indexList[foldIndex * foldInstanceNum:(foldIndex + 1) * foldInstanceNum] foldInstanceList.append(foldIndexInstanceList) foldIndexInstanceList = indexList[foldInstanceNum * (foldNum - 1):] foldInstanceList.append(foldIndexInstanceList) totalAccList = [[] for i in range(10)] humanAccList = [[] for i in range(10)] correctTransferRatioList = [] totalTransferNumList = [] correctTransferLabelNumList = [] correctUntransferRatioList = [] totalAuditorPrecisionList = [] totalAuditorRecallList = [] totalAuditorAccList = [] for foldIndex in range(foldNum): if self.m_multipleClass: self.m_clf = LR(multi_class="multinomial", solver='lbfgs', random_state=3) else: self.m_clf = LR(random_state=3) self.m_auditor0 = LR(random_state=3) self.m_auditor1 = LR(random_state=3) train = [] for preFoldIndex in range(foldIndex): train.extend(foldInstanceList[preFoldIndex]) test = foldInstanceList[foldIndex] for postFoldIndex in range(foldIndex + 1, foldNum): train.extend(foldInstanceList[postFoldIndex]) trainNum = int(totalInstanceNum * 0.9) targetNameFeatureTrain = self.m_targetNameFeature[train] targetLabelTrain = self.m_targetLabel[train] # targetDataFeatureTrain = self.m_targetDataFeature[train] targetNameFeatureTest = self.m_targetNameFeature[test] targetLabelTest = self.m_targetLabel[test] # transferLabelTest = self.m_transferLabel[test] transferLabelTest = [] initExList = [] initExList = self.pretrainSelectInit(train, foldIndex) # random.seed(101) # initExList = random.sample(train, 3) targetNameFeatureInit = self.m_targetNameFeature[initExList] targetLabelInit = self.m_targetLabel[initExList] print("initExList\t", initExList, targetLabelInit) queryIter = 0 labeledExList = [] unlabeledExList = [] ###labeled index labeledExList.extend(initExList) unlabeledExList = list(set(train) - set(labeledExList)) activeLabelNum = 3.0 transferLabelNum = 0.0 transferFeatureList = [] transferFlagList0 = [] transferFlagList1 = [] featureDim = len(targetNameFeatureTrain[0]) self.init_confidence_bound(featureDim, labeledExList, unlabeledExList) targetNameFeatureIter = targetNameFeatureInit targetLabelIter = targetLabelInit correctTransferLabelNum = 0.0 wrongTransferLabelNum = 0.0 correctUntransferLabelNum = 0.0 wrongUntransferLabelNum = 0.0 # auditorPrecisionList = [] # auditorRecallList = [] auditorAccList = [] while activeLabelNum < rounds: # targetNameFeatureIter = self.m_targetNameFeature[labeledExList] # targetLabelIter = self.m_targetLabel[labeledExList] self.m_clf.fit(targetNameFeatureIter, targetLabelIter) exId = self.select_example(unlabeledExList) # self.update_select_confidence_bound(exId) # print(idx) activeLabelFlag = False transferLabelFlag, weakOracleIndex, transferLabel = self.get_transfer_flag( transferFeatureList, transferFlagList0, transferFlagList1, exId, activeLabelNum) exLabel = -1 if transferLabelFlag: self.m_weakLabeledIDList.append(exId) transferLabelNum += 1.0 activeLabelFlag = False exLabel = transferLabel targetNameFeatureIter = np.vstack( (targetNameFeatureIter, self.m_targetNameFeature[exId])) targetLabelIter = np.hstack((targetLabelIter, exLabel)) # targetNameFeatureIter.append(self.m_targetNameFeature[exId]) # targetLabelIter.append(exLabel) if exLabel == self.m_targetLabel[exId]: correctTransferLabelNum += 1.0 print("queryIter\t", queryIter) else: wrongTransferLabelNum += 1.0 print("query iteration", queryIter, "error transfer label\t", exLabel, "true label", self.m_targetLabel[exId]) else: self.m_strongLabeledIDList.append(exId) self.update_judge_confidence_bound(exId) activeLabelNum += 1.0 activeLabelFlag = True exLabel = self.m_targetLabel[exId] targetNameFeatureIter = np.vstack( (targetNameFeatureIter, self.m_targetNameFeature[exId])) targetLabelIter = np.hstack((targetLabelIter, exLabel)) # targetNameFeatureIter.append(self.m_targetNameFeature[exId]) # targetLabelIter.append(exLabel) weakLabel0 = self.m_transferLabel0[exId] weakLabel1 = self.m_transferLabel1[exId] transferFeatureList.append(self.m_targetNameFeature[exId]) if weakLabel0 == exLabel: correctUntransferLabelNum += 1.0 transferFlagList0.append(1.0) else: wrongUntransferLabelNum += 1.0 transferFlagList0.append(0.0) if weakLabel1 == exLabel: correctUntransferLabelNum += 1.0 transferFlagList1.append(1.0) else: wrongUntransferLabelNum += 1.0 transferFlagList1.append(0.0) auditorAcc = self.getAuditorMetric(transferFeatureList, transferFlagList0, transferFlagList1, targetNameFeatureTest, transferLabelTest, targetLabelTest) print("auditorAcc", auditorAcc) auditorAccList.append(auditorAcc) labeledExList.append(exId) unlabeledExList.remove(exId) acc = self.get_pred_acc(targetNameFeatureTest, targetLabelTest, targetNameFeatureIter, targetLabelIter) totalAccList[cvIter].append(acc) if activeLabelFlag: humanAccList[cvIter].append(acc) queryIter += 1 totalAuditorAccList.append(auditorAccList) transferLabelNum = len(self.m_weakLabeledIDList) totalTransferNumList.append(transferLabelNum) correctTransferLabelNumList.append(correctTransferLabelNum) cvIter += 1 print("transfer num\t", np.mean(totalTransferNumList), np.sqrt(np.var(totalTransferNumList))) print("correct transfer num\t", np.mean(correctTransferLabelNumList), np.sqrt(np.var(correctTransferLabelNumList))) AuditorAccFile = modelVersion + "_auditor_acc.txt" writeFile(totalAuditorAccList, AuditorAccFile) totalACCFile = modelVersion + "_acc.txt" writeFile(totalAccList, totalACCFile) humanACCFile = modelVersion + "_human_acc.txt" writeFile(humanAccList, humanACCFile)
def find_best_network(self, T_val=100): self._open_hp_files() best_network = None best_ll = -float('inf') best_tau = 0 best_dropout = 0 best_HIDDEN_UNITS = [] for dropout_rate in self._DROPOUT_RATES: for tau in self._TAU_VALUES: for n_hidden in self._HIDDEN_UNITS_FILE: print ('Grid search step: Tau: ' + str(tau) + ' Dropout rate: ' + str(dropout_rate)+ ' Hidden units : ' + str(n_hidden)) network = self.mcd_model.model_runner( self.X_train, self.y_train, dropout_prob=dropout_rate, n_epochs=self.n_epochs, tau=tau, batch_size=self.batch_size, lengthscale=1e-2, n_hidden=n_hidden ) print('Starting prediction using validation data..') probs_mc_dropout = [] self.model = network T = T_val for t_i in range(T): print('T: ', t_i) probs_mc_dropout += [self.model.predict(self.X_val, batch_size=self.batch_size, verbose=1)] predictive_mean = np.mean(probs_mc_dropout, axis=0) predictive_variance = np.var(probs_mc_dropout, axis=0) # obtained the test ll from the validation sets ll = self.log_likelihood(self.y_val, predictive_mean, tau, T) if (ll > best_ll): best_ll = ll best_network = network best_tau = tau best_dropout = dropout_rate best_HIDDEN_UNITS = n_hidden print ('Best log_likelihood changed to: ' + str(best_ll)) print ('Best tau changed to: ' + str(best_tau)) print ('Best dropout rate changed to: ' + str(best_dropout)) self.best_tau_val = best_tau self.best_dropout_val = best_dropout self.best_HIDDEN_UNITS_lay = best_HIDDEN_UNITS self.best_MSD_model = best_network best_val = { 'best_tau': self.best_tau_val, 'best_dropout': self.best_dropout_val, 'best_HIDDEN_UNITS': self.best_HIDDEN_UNITS_lay } with open(self.hp_output_PATH, 'w') as fp: json.dump(best_val, fp) self.best_MSD_model.save(self.MCDmodel_output_PATH)
def plot_results(results, W, poss, shape, epochs, explore_prop, performance_check, sample_amount=3): # Check if the samples taken from the results is not more than the results sample_amount_check = True while sample_amount_check: if sample_amount * 2 > len(results): sample_amount -= 1 print("sample_amount reduced by 1") else: sample_amount_check = False top_results = [] mean_var_top_results = [] top_poss = [] bottom_results = [] mean_var_bottom_results = [] bottom_poss = [] title = f"Value at highest {sample_amount} states" # Remove end state (2,2) del results[2 * shape[1] + 2] del poss[2 * shape[1] + 2] for i in range(sample_amount): index_top = np.argmax(results, axis=0)[epochs - 1] top_results.append(results[index_top]) mean_var_top_results.append( (np.mean(results[index_top][:int(explore_prop * epochs)]), np.var(results[index_top][:int(explore_prop * epochs)]))) top_poss.append(poss[index_top]) del results[index_top] del poss[index_top] is_wall = True while is_wall: index_bottom = np.argmin(results, axis=0)[epochs - 1] if poss[index_bottom] in W: del results[index_bottom] del poss[index_bottom] else: is_wall = False bottom_results.append(results[index_bottom]) mean_var_bottom_results.append( (np.mean(results[index_bottom][:int(explore_prop * epochs)]), np.var(results[index_bottom][:int(explore_prop * epochs)]))) bottom_poss.append(poss[index_bottom]) del results[index_bottom] del poss[index_bottom] print_var = False for i in range(len(top_poss)): plt.plot(top_results[i], label=(f"{top_poss[i]}" + f" Top No. {i+1}" + (" #Start" if top_poss[i] == (5, 0) else " #End" if top_poss[i] == (2, 2) else "") + (f" {mean_var_top_results[i]}" if print_var else ""))) plt.xlabel("Epochs") plt.ylabel("Value") xmin, xmax, ymin, ymax = plt.axis() # plt.vlines(epochs*explore_prop, ymin, ymax, label="explore to exploit", linestyle="dotted") plt.legend(loc="best", framealpha=1) plt.title(f"Value at the highest {sample_amount} states") plt.draw() plt.waitforbuttonpress() plt.clf() plt.plot(performance_check) plt.title("Learning curve") plt.xlabel("Epochs") plt.ylabel("Error in shortest paths") plt.draw() plt.waitforbuttonpress() plt.clf() # for i in range(len(bottom_poss)): # plt.plot(bottom_results[i], label=(f"{bottom_poss[i]}" + f" Bottom No. {i+1}" + (" #Start" if bottom_poss[i]==(5,0) else " #End" if bottom_poss[i] == (2,2) else "") + (f" {mean_var_bottom_results[i]}" if print_var else ""))) # plt.xlabel("Epochs") # plt.ylabel("Value") # xmin, xmax, ymin, ymax = plt.axis() # plt.vlines(epochs*explore_prop, ymin, ymax, label="explore to exploit", linestyle="dotted") # plt.legend(loc="best", framealpha=1) # plt.title(f"Value at the lowest {sample_amount} states") # plt.draw() # plt.waitforbuttonpress() # plt.clf() plt.close()
def batchnorm_forward(x, gamma, beta, bn_param): """ Forward pass for batch normalization. During training the sample mean and (uncorrected) sample variance are computed from minibatch statistics and used to normalize the incoming data. During training we also keep an exponentially decaying running mean of the mean and variance of each feature, and these averages are used to normalize data at test-time. At each timestep we update the running averages for mean and variance using an exponential decay based on the momentum parameter: running_mean = momentum * running_mean + (1 - momentum) * sample_mean running_var = momentum * running_var + (1 - momentum) * sample_var Note that the batch normalization paper suggests a different test-time behavior: they compute sample mean and variance for each feature using a large number of training images rather than using a running average. For this implementation we have chosen to use running averages instead since they do not require an additional estimation step; the torch7 implementation of batch normalization also uses running averages. Input: - x: Data of shape (N, D) - gamma: Scale parameter of shape (D,) - beta: Shift paremeter of shape (D,) - bn_param: Dictionary with the following keys: - mode: 'train' or 'test'; required - eps: Constant for numeric stability - momentum: Constant for running mean / variance. - running_mean: Array of shape (D,) giving running mean of features - running_var Array of shape (D,) giving running variance of features Returns a tuple of: - out: of shape (N, D) - cache: A tuple of values needed in the backward pass """ mode = bn_param['mode'] eps = bn_param.get('eps', 1e-5) momentum = bn_param.get('momentum', 0.9) N, D = x.shape running_mean = bn_param.get('running_mean', np.zeros(D, dtype=x.dtype)) running_var = bn_param.get('running_var', np.zeros(D, dtype=x.dtype)) out, cache = None, None if mode == 'train': ####################################################################### # TODO: Implement the training-time forward pass for batch norm. # # Use minibatch statistics to compute the mean and variance, use # # these statistics to normalize the incoming data, and scale and # # shift the normalized data using gamma and beta. # # # # You should store the output in the variable out. Any intermediates # # that you need for the backward pass should be stored in the cache # # variable. # # # # You should also use your computed sample mean and variance together # # with the momentum variable to update the running mean and running # # variance, storing your result in the running_mean and running_var # # variables. # ####################################################################### sample_mean = np.mean(x, axis=0) sample_var = np.var(x, axis=0) x_norm = (x - sample_mean) / np.sqrt(sample_var + eps) out = x_norm * gamma + beta running_mean = momentum * running_mean + (1 - momentum) * sample_mean running_var = momentum * running_var + (1 - momentum) * sample_var cache = (x_norm, gamma, x - sample_mean, 1 / np.sqrt(sample_var + eps)) ####################################################################### # END OF YOUR CODE # ####################################################################### elif mode == 'test': ####################################################################### # TODO: Implement the test-time forward pass for batch normalization. # # Use the running mean and variance to normalize the incoming data, # # then scale and shift the normalized data using gamma and beta. # # Store the result in the out variable. # ####################################################################### x_norm = (x - running_mean) / np.sqrt(running_var + eps) out = x_norm * gamma + beta ####################################################################### # END OF YOUR CODE # ####################################################################### else: raise ValueError('Invalid forward batchnorm mode "%s"' % mode) # Store the updated running means back into bn_param bn_param['running_mean'] = running_mean bn_param['running_var'] = running_var return out, cache
def outputTemp(Mean, Variance): print( time.strftime("%H:%M:%S", time.localtime()) + " {:>5.2f} {:>4.3f}".format(Mean, Variance)) if __name__ == '__main__': outputTimeStep = 1 screenOutput = False filename = time.strftime("%m-%d-at-%H:%M-year%YRecordLog.txt", time.localtime()) with open(filename, 'w') as f: if screenOutput: outputBanner() f.write("{} {} {}\n".format('Time', 'ObjectiveTemp', 'Variance')) while True: startTime = time.time() tempList = [] while time.time() - startTime < outputTimeStep: tempList.append(chanRef.voltage * 10) time.sleep(outputTimeStep // 50) tempArray = np.array(tempList) Mean, Var = np.mean(tempArray), np.var(tempArray) f.write( time.strftime("%H:%M:%S", time.localtime()) + " {:>5.2f} {:>4.3f}\n".format(Mean, Var)) if screenOutput: outputTemp(Mean, Var)
def test_hermitian(self, device, tol, skip_if): """Test that a tensor product involving qml.Hermitian works correctly""" n_wires = 3 dev = device(n_wires) skip_if(dev, {"supports_tensor_observables": False}) theta = 0.432 phi = 0.123 varphi = -0.543 A_ = 0.1 * np.array([ [-6, 2 + 1j, -3, -5 + 2j], [2 - 1j, 0, 2 - 1j, -5 + 4j], [-3, 2 + 1j, 0, -4 + 3j], [-5 - 2j, -5 - 4j, -4 - 3j, -6], ]) @qml.qnode(dev) def circuit(): qml.RX(theta, wires=[0]) qml.RX(phi, wires=[1]) qml.RX(varphi, wires=[2]) qml.CNOT(wires=[0, 1]) qml.CNOT(wires=[1, 2]) return qml.sample( qml.PauliZ(wires=[0]) @ qml.Hermitian(A_, wires=[1, 2])) res = circuit() # res should only contain the eigenvalues of # the hermitian matrix tensor product Z Z = np.diag([1, -1]) eigvals = np.linalg.eigvalsh(np.kron(Z, A_)) assert np.allclose(sorted(np.unique(res)), sorted(eigvals), atol=tol(False)) mean = np.mean(res) expected = (0.1 * 0.5 * (-6 * np.cos(theta) * (np.cos(varphi) + 1) - 2 * np.sin(varphi) * (np.cos(theta) + np.sin(phi) - 2 * np.cos(phi)) + 3 * np.cos(varphi) * np.sin(phi) + np.sin(phi))) assert np.allclose(mean, expected, atol=tol(False)) var = np.var(res) expected = ( 0.01 * (1057 - np.cos(2 * phi) + 12 * (27 + np.cos(2 * phi)) * np.cos(varphi) - 2 * np.cos(2 * varphi) * np.sin(phi) * (16 * np.cos(phi) + 21 * np.sin(phi)) + 16 * np.sin(2 * phi) - 8 * (-17 + np.cos(2 * phi) + 2 * np.sin(2 * phi)) * np.sin(varphi) - 8 * np.cos(2 * theta) * (3 + 3 * np.cos(varphi) + np.sin(varphi))**2 - 24 * np.cos(phi) * (np.cos(phi) + 2 * np.sin(phi)) * np.sin(2 * varphi) - 8 * np.cos(theta) * (4 * np.cos(phi) * (4 + 8 * np.cos(varphi) + np.cos(2 * varphi) - (1 + 6 * np.cos(varphi)) * np.sin(varphi)) + np.sin(phi) * (15 + 8 * np.cos(varphi) - 11 * np.cos(2 * varphi) + 42 * np.sin(varphi) + 3 * np.sin(2 * varphi)))) / 16) assert np.allclose(var, expected, atol=tol(False))
def makeViolinPlots(dataForViolinO, theAx, ytitle): dataForViolin = dict() for ke in dataForViolinO.keys(): # if ke != 'GSE47652': # continue dataForViolin[ke] = dataForViolinO[ke] ax = theAx titles = dataForViolin.keys() allData = [] numFns = len(titles) for ke in titles: dataRAND = dataForViolin[ke][1] if np.var(dataRAND) < 0.000001: dataRAND[0] = dataRAND[1] + 0.0001 dataRSS = dataForViolin[ke][2] if np.var(dataRSS) < 0.000001: dataRSS[0] = dataRSS[1] + 0.0001 allData.append(dataRAND) allData.append(dataRSS) positions = [[0.7 * i, 0.7 * i] for i in range(1, numFns + 1)] positions = [item for sublist in positions for item in sublist] ax.plot([positions[0] - 1.05, positions[-1] + 0.35], [1, 1], 'r:', alpha=0.7, zorder=-1) violin_parts = ax.violinplot(allData, showmeans=True, positions=positions, showextrema=False) # print(str(violin_parts['cmeans'].get_segments())) count = 0 Osegs = violin_parts['cmeans'].get_segments() segs = [] for b in violin_parts['bodies']: # b.set_alpha(0.75) thisSeg = Osegs[count] midPointX = thisSeg[0][0] + (thisSeg[1][0] - thisSeg[0][0]) / 2.0 if count % 2 == 0: m = np.mean(b.get_paths()[0].vertices[:, 0]) b.get_paths()[0].vertices[:, 0] = np.clip( b.get_paths()[0].vertices[:, 0], -np.inf, m) b.set_color('b') b.set_facecolor('blue') newSeg = np.array([[thisSeg[0][0], thisSeg[0][1]], [midPointX + 0.01, thisSeg[1][1]]]) else: m = np.mean(b.get_paths()[0].vertices[:, 0]) b.get_paths()[0].vertices[:, 0] = np.clip( b.get_paths()[0].vertices[:, 0], m, np.inf) b.set_color('g') b.set_facecolor('green') newSeg = np.array([[midPointX - 0.01, thisSeg[0][1]], [thisSeg[1][0], thisSeg[1][1]]]) count += 1 segs.append(newSeg) b.set_alpha(0.99) violin_parts['cmeans'].set_color('black') # print("seg:") violin_parts['cmeans'].set_segments(segs) # print(str(violin_parts['cmeans'].get_segments())) # print("__\n") ax.set_xticks([0.7 * i for i in range(1, numFns + 1)]) ax.set_xticklabels(titles, rotation='vertical') ax.set_ylabel(ytitle) ylim = list(ax.get_ylim()) if ylim[0] < 1 and ylim[1] <= 1: ylim[1] = 1.1 if ylim[0] >= 1 and ylim[1] > 1: ylim[0] = 0.9 ax.set_ylim(ylim) ax.set_yticks(ax.get_yticks()[1:-1]) # Custom legend import matplotlib.patches as mpatches pB = mpatches.Patch(color='blue', linewidth=0) pG = mpatches.Patch(color='green', linewidth=0)
MpsdWelch = periodogramaWelch(x,K,O) #Este es el implementado por mi psdWelch = np.zeros(shape=(201,R)) for i in range(R): f,psdWelch[:,i] = signal.welch(x[:,i], fs, 'bartlett',nperseg=400, noverlap=200) # ver: scipy.signal.welch plt.figure(1) plt.plot(f,20*np.log10(psdWelch)) Mf = np.arange(len(MpsdWelch))*(fs/(len(MpsdWelch)*2)) plt.figure(2) plt.plot(Mf,20*np.log10(MpsdWelch)) # Obtengo la frecuencia de la senoidal utilizando el estimador estF0 = f[np.argmax(psdWelch,axis=0)] valEspF0 = np.mean(estF0) varEstF0 = np.var(estF0) print(estF0) print(valEspF0) print(varEstF0) ''' f = np.arange(len(psdWelch))*(fs/(len(psdWelch)*2)) # Obtengo la frecuencia de la senoidal utilizando el estimador estF0 = f[np.argmax(psdWelch,axis=0)] valEspF0 = np.mean(estF0) varEstF0 = np.var(estF0)
except Exception: # protect against API changes pass if origVariance: fig.suptitle("Diffim residuals: Normalized by sqrt(input variance)", fontsize=titleFs) else: fig.suptitle("Diffim residuals: Normalized by sqrt(diffim variance)", fontsize=titleFs) sp1 = pylab.subplot(221) sp2 = pylab.subplot(222, sharex=sp1, sharey=sp1) sp3 = pylab.subplot(223, sharex=sp1, sharey=sp1) sp4 = pylab.subplot(224, sharex=sp1, sharey=sp1) xs = np.arange(-5, 5.05, 0.1) ys = 1. / np.sqrt(2 * np.pi) * np.exp( -0.5 * xs**2 ) sp1.hist(candidateResids, bins=xs, normed=True, alpha=0.5, label="N(%.2f, %.2f)" % (np.mean(candidateResids), np.var(candidateResids))) sp1.plot(xs, ys, "r-", lw=2, label="N(0,1)") sp1.set_title("Candidates: basis fit", fontsize=titleFs-2) sp1.legend(loc=1, fancybox=True, shadow=True, prop = FontProperties(size=titleFs-6)) sp2.hist(spatialResids, bins=xs, normed=True, alpha=0.5, label="N(%.2f, %.2f)" % (np.mean(spatialResids), np.var(spatialResids))) sp2.plot(xs, ys, "r-", lw=2, label="N(0,1)") sp2.set_title("Candidates: spatial fit", fontsize=titleFs-2) sp2.legend(loc=1, fancybox=True, shadow=True, prop = FontProperties(size=titleFs-6)) sp3.hist(nonfitResids, bins=xs, normed=True, alpha=0.5, label="N(%.2f, %.2f)" % (np.mean(nonfitResids), np.var(nonfitResids))) sp3.plot(xs, ys, "r-", lw=2, label="N(0,1)") sp3.set_title("Control sample: spatial fit", fontsize=titleFs-2) sp3.legend(loc=1, fancybox=True, shadow=True, prop = FontProperties(size=titleFs-6))
data = mat_file.get('arrhythmia') data = data[~np.all(data == 0, axis=1)] # deleting eventual zero columns class_id = data[:, -1] class_id[np.where(class_id > 1)] = 2 class_id = class_id - 1 data = data[:, :-1] (N, F) = np.shape(data) mean = np.mean(data) std = np.std(data) x_norm = (data - mean) / std mean = np.mean(x_norm, 0) var = np.var(x_norm, 0) n_healthy = sum(class_id == 0) n_ill = sum(class_id == 1) # initializing the neural network graph tf.set_random_seed(1234) learning_rate = 1e-4 n_hidden_nodes_1 = F n_hidden_nodes_2 = 128 x = tf.placeholder(tf.float64, [N, F]) t = tf.placeholder(tf.float64, [N, 1]) # first layer w1 = tf.Variable(
def func(N): sum = 0 for i in range(N): x, y = np.random.rand(2) sum += np.exp(5 * abs(x - 5) + 5 * abs(y - 5)) return sum / N s = [] for i in range(5): print(func(100)) for i in range(50): s.append(func(1000)) print(np.var(s)) def func1(N): sum = 0 for i in range(N): x, y = np.random.rand(2) * 2 - [1, 1] sum += math.cos(math.pi + 5 * x + 5 * y) return 2 * 2 * sum / N b = [] for i in range(5): print(func1(100)) for i in range(50): b.append(func1(1000))
y_train = data_train[:,0] y_test = data_test_val[:,0] # normalize x and y num_classes = len(np.unique(y_train)) ''' base = np.min(y_train) #Check if data is 0-based if base != 0: y_train -= base y_test -= base ''' if input_norm: mean = np.mean(X_train,axis=0) variance = np.var(X_train,axis=0) X_train -= mean #The 1e-9 avoids dividing by zero X_train /= np.sqrt(variance)+1e-9 X_test -= mean X_test /= np.sqrt(variance)+1e-9 #epochs = np.floor(batch_size*max_iterations / N) #print('Train with approximately %d epochs' %(epochs)) # place for the input variables x = tf.placeholder("float", shape=[None, D], name = 'Input_data') y_ = tf.placeholder(tf.int64, shape=[None], name = 'Ground_truth') keep_prob = tf.placeholder("float") bn_train = tf.placeholder(tf.bool) #Boolean value to guide batchnorm
matrix_np = np.array(matrix) print(matrix_np) print("sum ", matrix_np.sum()) print(matrix_np[1, 1]) print("min in array", matrix_np.min()) print("min in row", matrix_np.min(axis=0)) print("min in column", matrix_np.min(axis=1)) print("max in array", matrix_np.max()) print("max in row", matrix_np.max(axis=0)) print("max in column ", matrix_np.max(axis=1)) print("sum row", matrix_np.sum(axis=0)) print("sum column", matrix_np.sum(axis=1)) print("mean column", matrix_np.mean(axis=1)) print("mean row", matrix_np.mean(axis=0)) print("var ", np.var(matrix_np)) print("std ", np.std(matrix_np)) print("median ", np.median(matrix_np)) ################ sqrt, sin, log, abs ##################### print("add ", np.add(matrix_np, 5)) print("sqrt ", np.sqrt(matrix_np)) print("sin ", np.sin(matrix_np)) print("log ", np.log(matrix_np)) matrix_abs = [[-15, 35], [9, -36]] print(np.abs(matrix_abs)) print(np.add(matrix_abs, matrix_np)) print(np.array_equal(matrix_np, matrix_np)) print(np.array_equal(matrix_np, matrix_abs)) print(np.ceil(random_numbers)) print(np.floor(random_numbers))
# 2. Compute frobenius norm of each timestep of the utterance timestep_norms.extend(LA.norm(mat, axis=1)) utt_norms = sorted(utt_norms) timestep_norms = sorted(timestep_norms) fit_utt = stats.norm.pdf(utt_norms, np.mean(utt_norms), np.std(utt_norms)) fit_timestep = stats.norm.pdf(timestep_norms, np.mean(timestep_norms), np.std(timestep_norms)) #print(skew(fit_utt), kurtosis(fit_utt)) plt.plot(utt_norms, fit_utt, '-r') plt.hist(utt_norms, bins=50, normed=True, alpha=0.5) m, v, s, k = round(float(np.mean(utt_norms)), 3), round(float(np.var(utt_norms)), 3), round(skew(fit_utt), 3), round(kurtosis(fit_utt), 3) print(m, v, s, k) plt.title("Mean {}, Var {}, Skew {}, Kurt {}".format(m, v, s, k)) plt.savefig(utt_norm_dist, dpi=300) # Clear the figure plt.clf() plt.plot(timestep_norms, fit_timestep, '-r') plt.hist(timestep_norms, bins=50, normed=True, alpha=0.5) m, v, s, k = round(float(np.mean(timestep_norms)), 3), round(float(np.var(timestep_norms)), 3), round(skew(timestep_norms),
input_image = resized_image.transpose((2, 0, 1)) # Repeat image according to batch size for inference. input_image = np.repeat(input_image[np.newaxis, :, :, :], input_shape[0], axis=0) # Inference using Bayesian SegNet start = time.time() out = net.forward_all(data=input_image) end = time.time() print '%30s' % 'Executed Bayesian SegNet in ',\ str((end - start) * 1000), 'ms' mean_confidence = np.mean(confidence_output, axis=0, dtype=np.float64) var_confidence = np.var(confidence_output, axis=0, dtype=np.float64) # Prepare segmented image results classes = np.argmax(mean_confidence, axis=0) segmentation_bgr = np.asarray(LABEL_COLOURS[classes]).astype(np.uint8) segmented_image = overlay_segmentation_results(resized_image, segmentation_bgr) # Prepare confidence results confidence = np.amax(mean_confidence, axis=0) # Prepare uncertainty results uncertainty = np.mean(var_confidence, axis=0, dtype=np.float64) print(np.sqrt(np.mean(uncertainty)))
def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None): """Actual implementation of Gaussian NB fitting. Parameters ---------- X : array-like, shape (n_samples, n_features) Training vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples,) Target values. classes : array-like, shape (n_classes,) List of all the classes that can possibly appear in the y vector. Must be provided at the first call to partial_fit, can be omitted in subsequent calls. _refit: bool If true, act as though this were the first time we called _partial_fit (ie, throw away any past fitting and start over). sample_weight : array-like, shape (n_samples,), optional Weights applied to individual samples (1. for unweighted). Returns ------- self : object Returns self. """ X, y = check_X_y(X, y) # If the ratio of data variance between dimensions is too small, it # will cause numerical errors. To address this, we artificially # boost the variance by epsilon, a small fraction of the standard # deviation of the largest dimension. epsilon = 1e-9 * np.var(X, axis=0).max() if _refit: self.classes_ = None if _check_partial_fit_first_call(self, classes): # This is the first call to partial_fit: # initialize various cumulative counters n_features = X.shape[1] n_classes = len(self.classes_) self.theta_ = np.zeros((n_classes, n_features)) self.sigma_ = np.zeros((n_classes, n_features)) self.class_prior_ = np.zeros(n_classes) self.class_count_ = np.zeros(n_classes) else: if X.shape[1] != self.theta_.shape[1]: msg = "Number of features %d does not match previous data %d." raise ValueError(msg % (X.shape[1], self.theta_.shape[1])) # Put epsilon back in each time self.sigma_[:, :] -= epsilon classes = self.classes_ unique_y = np.unique(y) unique_y_in_classes = in1d(unique_y, classes) if not np.all(unique_y_in_classes): raise ValueError("The target label(s) %s in y do not exist in the " "initial classes %s" % (y[~unique_y_in_classes], classes)) for y_i in unique_y: i = classes.searchsorted(y_i) X_i = X[y == y_i, :] if sample_weight is not None: sw_i = sample_weight[y == y_i] N_i = sw_i.sum() else: sw_i = None N_i = X_i.shape[0] new_theta, new_sigma = self._update_mean_variance( self.class_count_[i], self.theta_[i, :], self.sigma_[i, :], X_i, sw_i) self.theta_[i, :] = new_theta self.sigma_[i, :] = new_sigma self.class_count_[i] += N_i self.sigma_[:, :] += epsilon self.class_prior_[:] = self.class_count_ / np.sum(self.class_count_) return self
def versuch_auswerten(versuch_werte, versuch_name, header): # Werte verarbeitbar machen delta_l_values = pd.to_numeric(versuch_werte.delta_l_t).values delta_r_values = pd.to_numeric(versuch_werte.delta_r_t).values delta_m_values = pd.to_numeric(versuch_werte.delta_m_t).values geschwindigkeit_l_values = pd.to_numeric( versuch_werte.geschwindigkeit_l).values geschwindigkeit_r_values = pd.to_numeric( versuch_werte.geschwindigkeit_r).values geschwindigkeit_m_values = pd.to_numeric( versuch_werte.geschwindigkeit_m).values tendenz_l_values = pd.to_numeric(versuch_werte.tendenz_l).values tendenz_r_values = pd.to_numeric(versuch_werte.tendenz_r).values tendenz_m_values = pd.to_numeric(versuch_werte.tendenz_m).values blick_l_x_values = pd.to_numeric(versuch_werte.blick_l_x).values blick_l_y_values = pd.to_numeric(versuch_werte.blick_l_y).values blick_r_x_values = pd.to_numeric(versuch_werte.blick_r_x).values blick_r_y_values = pd.to_numeric(versuch_werte.blick_r_y).values sacc_m_values = pd.to_numeric(versuch_werte.sacc_m).values sacc_l_values = pd.to_numeric(versuch_werte.sacc_l).values sacc_r_values = pd.to_numeric(versuch_werte.sacc_r).values # Mittelkwerte bestimmen # Kein Exceptionhandling, da ein leeres Array dazu fuehrt, dass np.mean() nan zurueckgibt und keine Exception if delta_l_values[np.nonzero(delta_l_values)].size == 0: mean_delta_l = -1 else: mean_delta_l = np.mean(delta_l_values[np.nonzero(delta_l_values)]) if delta_r_values[np.nonzero(delta_r_values)].size == 0: mean_delta_r = -1 else: mean_delta_r = np.mean(delta_r_values[np.nonzero(delta_r_values)]) if delta_m_values[np.nonzero(delta_m_values)].size == 0: mean_delta_m = -1 else: mean_delta_m = np.mean(delta_m_values[np.nonzero(delta_m_values)]) if geschwindigkeit_l_values[np.nonzero( geschwindigkeit_l_values)].size == 0: mean_geschwindigkeit_l = -1 else: mean_geschwindigkeit_l = np.mean( geschwindigkeit_l_values[np.nonzero(geschwindigkeit_l_values)]) if geschwindigkeit_r_values[np.nonzero( geschwindigkeit_r_values)].size == 0: mean_geschwindigkeit_r = -1 else: mean_geschwindigkeit_r = np.mean( geschwindigkeit_r_values[np.nonzero(geschwindigkeit_r_values)]) if geschwindigkeit_m_values[np.nonzero( geschwindigkeit_m_values)].size == 0: mean_geschwindigkeit_m = -1 else: mean_geschwindigkeit_m = np.mean( geschwindigkeit_m_values[np.nonzero(geschwindigkeit_m_values)]) header = np.append(header, [ versuch_name + '_mean_delta_l', versuch_name + '_mean_delta_r', versuch_name + '_mean_delta_m', versuch_name + '_mean_geschwindigkeit_l', versuch_name + '_mean_geschwindigkeit_r', versuch_name + '_mean_geschwindigkeit_m' ]) # Maxima bestimmen try: max_delta_l = np.max(delta_l_values[np.nonzero(delta_l_values)]) except ValueError: max_delta_l = -1 try: max_delta_r = np.max(delta_r_values[np.nonzero(delta_r_values)]) except ValueError: max_delta_r = -1 try: max_delta_m = np.max(delta_m_values[np.nonzero(delta_m_values)]) except ValueError: max_delta_m = -1 try: max_geschwindigkeit_l = np.max( geschwindigkeit_l_values[np.nonzero(geschwindigkeit_l_values)]) except ValueError: max_geschwindigkeit_l = -1 try: max_geschwindigkeit_r = np.max( geschwindigkeit_r_values[np.nonzero(geschwindigkeit_r_values)]) except ValueError: max_geschwindigkeit_r = -1 try: max_geschwindigkeit_m = np.max( geschwindigkeit_m_values[np.nonzero(geschwindigkeit_m_values)]) except ValueError: max_geschwindigkeit_m = -1 header = np.append(header, [ versuch_name + '_max_delta_l', versuch_name + '_max_delta_r', versuch_name + '_max_delta_m', versuch_name + '_max_geschwindigkeit_l', versuch_name + '_max_geschwindigkeit_r', versuch_name + '_max_geschwindigkeit_m' ]) # Minima bestimmen #Exceptionhandling fuer die Versuchspersonen, bei denen nur ein Auge gemessen wurde try: min_delta_l = np.min(delta_l_values[np.nonzero(delta_l_values)]) except ValueError: min_delta_l = -1 try: min_delta_r = np.min(delta_r_values[np.nonzero(delta_r_values)]) except ValueError: min_delta_r = -1 try: min_delta_m = np.min(delta_m_values[np.nonzero(delta_m_values)]) except ValueError: min_delta_m = -1 try: min_geschwindigkeit_l = np.min( geschwindigkeit_l_values[np.nonzero(geschwindigkeit_l_values)]) except ValueError: min_geschwindigkeit_l = -1 try: min_geschwindigkeit_r = np.min( geschwindigkeit_r_values[np.nonzero(geschwindigkeit_r_values)]) except ValueError: min_geschwindigkeit_r = -1 try: min_geschwindigkeit_m = np.min( geschwindigkeit_m_values[np.nonzero(geschwindigkeit_m_values)]) except ValueError: min_geschwindigkeit_m = -1 header = np.append(header, [ versuch_name + '_min_delta_l', versuch_name + '_min_delta_r', versuch_name + '_min_delta_m', versuch_name + '_min_geschwindigkeit_l', versuch_name + '_min_geschwindigkeit_r', versuch_name + '_min_geschwindigkeit_m' ]) # Standardabweichungen berechnen if delta_l_values[np.nonzero(delta_l_values)].size == 0: std_delta_l = -1 else: std_delta_l = np.std(delta_l_values[np.nonzero(delta_l_values)]) if delta_r_values[np.nonzero(delta_r_values)].size == 0: std_delta_r = -1 else: std_delta_r = np.std(delta_r_values[np.nonzero(delta_r_values)]) if delta_m_values[np.nonzero(delta_m_values)].size == 0: std_delta_m = -1 else: std_delta_m = np.std(delta_m_values[np.nonzero(delta_m_values)]) if geschwindigkeit_l_values[np.nonzero( geschwindigkeit_l_values)].size == 0: std_geschwindigkeit_l = -1 else: std_geschwindigkeit_l = np.std( geschwindigkeit_l_values[np.nonzero(geschwindigkeit_l_values)]) if geschwindigkeit_r_values[np.nonzero( geschwindigkeit_r_values)].size == 0: std_geschwindigkeit_r = -1 else: std_geschwindigkeit_r = np.std( geschwindigkeit_r_values[np.nonzero(geschwindigkeit_r_values)]) if geschwindigkeit_m_values[np.nonzero( geschwindigkeit_m_values)].size == 0: std_geschwindigkeit_m = -1 else: std_geschwindigkeit_m = np.std( geschwindigkeit_m_values[np.nonzero(geschwindigkeit_m_values)]) header = np.append(header, [ versuch_name + '_standardabweichung_delta_l', versuch_name + '_standardabweichung_delta_r', versuch_name + '_standardabweichung_delta_m', versuch_name + '_standardabweichung_geschwindigkeit_l', versuch_name + '_standardabweichung_geschwindigkeit_r', versuch_name + '_standardabweichung_geschwindigkeit_m' ]) # Varianzen berechnen if delta_l_values[np.nonzero(delta_l_values)].size == 0: var_delta_l = -1 else: var_delta_l = np.var(delta_l_values[np.nonzero(delta_l_values)]) if delta_r_values[np.nonzero(delta_r_values)].size == 0: var_delta_r = -1 else: var_delta_r = np.var(delta_r_values[np.nonzero(delta_r_values)]) if delta_m_values[np.nonzero(delta_m_values)].size == 0: var_delta_m = -1 else: var_delta_m = np.var(delta_m_values[np.nonzero(delta_m_values)]) if geschwindigkeit_l_values[np.nonzero( geschwindigkeit_l_values)].size == 0: var_geschwindigkeit_l = -1 else: var_geschwindigkeit_l = np.var( geschwindigkeit_l_values[np.nonzero(geschwindigkeit_l_values)]) if geschwindigkeit_r_values[np.nonzero( geschwindigkeit_r_values)].size == 0: var_geschwindigkeit_r = -1 else: var_geschwindigkeit_r = np.var( geschwindigkeit_r_values[np.nonzero(geschwindigkeit_r_values)]) if geschwindigkeit_m_values[np.nonzero( geschwindigkeit_m_values)].size == 0: var_geschwindigkeit_m = -1 else: var_geschwindigkeit_m = np.var( geschwindigkeit_m_values[np.nonzero(geschwindigkeit_m_values)]) header = np.append(header, [ versuch_name + '_varianz_delta_l', versuch_name + '_varianz_delta_r', versuch_name + '_varianz_delta_m', versuch_name + '_varianz_geschwindigkeit_l', versuch_name + '_varianz_geschwindigkeit_r', versuch_name + '_varianz_geschwindigkeit_m' ]) # Tendenz auswerten condition_voraus_l = np.equal(tendenz_l_values, 1) num_voraus_l = len(np.extract(condition_voraus_l, tendenz_l_values)) condition_voraus_r = np.equal(tendenz_r_values, 1) num_voraus_r = len(np.extract(condition_voraus_r, tendenz_r_values)) condition_voraus_m = np.equal(tendenz_m_values, 1) num_voraus_m = len(np.extract(condition_voraus_m, tendenz_m_values)) condition_hinter_l = np.equal(tendenz_l_values, -1) num_hinter_l = len(np.extract(condition_hinter_l, tendenz_l_values)) condition_hinter_r = np.equal(tendenz_r_values, -1) num_hinter_r = len(np.extract(condition_hinter_r, tendenz_r_values)) condition_hinter_m = np.equal(tendenz_m_values, -1) num_hinter_m = len(np.extract(condition_hinter_m, tendenz_m_values)) # -100 steht fuer keinen errechneten Wert, sondern fuer nich vorhanden. if num_voraus_l == 0 and num_hinter_l == 0: tendenz_l = -100 else: if num_voraus_l > num_hinter_l: tendenz_l = 1 else: if num_hinter_l > num_voraus_l: tendenz_l = -1 else: tendenz_l = 0 if num_voraus_r == 0 and num_hinter_r == 0: tendenz_r = -100 else: if num_voraus_r > num_hinter_r: tendenz_r = 1 else: if num_hinter_r > num_voraus_r: tendenz_r = -1 else: tendenz_r = 0 if num_voraus_m == 0 and num_hinter_m == 0: tendenz_m = -100 else: if num_voraus_m > num_hinter_m: tendenz_m = 1 else: if num_hinter_m > num_voraus_m: tendenz_m = -1 else: tendenz_m = 0 header = np.append(header, [ versuch_name + '_tendenz_l', versuch_name + '_tendenz_r', versuch_name + '_tendenz_m' ]) # Berechnung der Kovarianz vom linken und rechten Auge cov_x = np.cov(blick_l_x_values, blick_r_x_values)[0][1] cov_y = np.cov(blick_l_y_values, blick_r_y_values)[0][1] header = np.append(header, [ versuch_name + '_Kovarianz_blick_x', versuch_name + '_Kovarianz_blick_y' ]) verhaeltnis_l_x_da = blick_l_x_values[np.nonzero( blick_l_x_values)].size / blick_l_x_values.size verhaeltnis_l_y_da = blick_l_y_values[np.nonzero( blick_l_y_values)].size / blick_l_y_values.size verhaeltnis_r_x_da = blick_r_x_values[np.nonzero( blick_r_x_values)].size / blick_r_x_values.size verhaeltnis_r_y_da = blick_r_y_values[np.nonzero( blick_r_y_values)].size / blick_r_y_values.size sacc_m = np.sum(sacc_m_values) sacc_l = np.sum(sacc_l_values) sacc_r = np.sum(sacc_r_values) if versuch_name == 'Horizontal' or versuch_name == 'Liegende_8_schnell': sacc_rate_m = sacc_m / (999 * 4) sacc_rate_l = sacc_l / (999 * 4) sacc_rate_r = sacc_r / (999 * 4) else: sacc_rate_m = sacc_m / (999 * 5) sacc_rate_l = sacc_l / (999 * 5) sacc_rate_r = sacc_r / (999 * 5) header = np.append(header, [ versuch_name + '_links_verhaeltnis_x', versuch_name + '_links_verhaeltnis_y', versuch_name + '_rechts_verhaeltnis_x', versuch_name + '_rechts_verhaeltnis_y', versuch_name + '_sacc_m', versuch_name + '_sacc_rate_m', versuch_name + '_sacc_l', versuch_name + '_sacc_rate_l', versuch_name + '_sacc_r', versuch_name + '_sacc_rate_r' ]) yield [[ mean_delta_l, mean_delta_r, mean_delta_m, mean_geschwindigkeit_l, mean_geschwindigkeit_r, mean_geschwindigkeit_m, max_delta_l, max_delta_r, max_delta_m, max_geschwindigkeit_l, max_geschwindigkeit_r, max_geschwindigkeit_m, min_delta_l, min_delta_r, min_delta_m, min_geschwindigkeit_l, min_geschwindigkeit_r, min_geschwindigkeit_m, std_delta_l, std_delta_r, std_delta_m, std_geschwindigkeit_l, std_geschwindigkeit_r, std_geschwindigkeit_m, var_delta_l, var_delta_r, var_delta_m, var_geschwindigkeit_l, var_geschwindigkeit_r, var_geschwindigkeit_m, tendenz_l, tendenz_r, tendenz_m, cov_x, cov_y, verhaeltnis_l_x_da, verhaeltnis_l_y_da, verhaeltnis_r_x_da, verhaeltnis_r_y_da, sacc_m, sacc_rate_m, sacc_l, sacc_rate_l, sacc_r, sacc_rate_r ]] yield header
def _update_mean_variance(n_past, mu, var, X, sample_weight=None): """Compute online update of Gaussian mean and variance. Given starting sample count, mean, and variance, a new set of points X, and optionally sample weights, return the updated mean and variance. (NB - each dimension (column) in X is treated as independent -- you get variance, not covariance). Can take scalar mean and variance, or vector mean and variance to simultaneously update a number of independent Gaussians. See Stanford CS tech report STAN-CS-79-773 by Chan, Golub, and LeVeque: http://i.stanford.edu/pub/cstr/reports/cs/tr/79/773/CS-TR-79-773.pdf Parameters ---------- n_past : int Number of samples represented in old mean and variance. If sample weights were given, this should contain the sum of sample weights represented in old mean and variance. mu : array-like, shape (number of Gaussians,) Means for Gaussians in original set. var : array-like, shape (number of Gaussians,) Variances for Gaussians in original set. sample_weight : array-like, shape (n_samples,), optional Weights applied to individual samples (1. for unweighted). Returns ------- total_mu : array-like, shape (number of Gaussians,) Updated mean for each Gaussian over the combined set. total_var : array-like, shape (number of Gaussians,) Updated variance for each Gaussian over the combined set. """ if X.shape[0] == 0: return mu, var # Compute (potentially weighted) mean and variance of new datapoints if sample_weight is not None: n_new = float(sample_weight.sum()) new_mu = np.average(X, axis=0, weights=sample_weight / n_new) new_var = np.average((X - new_mu) ** 2, axis=0, weights=sample_weight / n_new) else: n_new = X.shape[0] new_var = np.var(X, axis=0) new_mu = np.mean(X, axis=0) if n_past == 0: return new_mu, new_var n_total = float(n_past + n_new) # Combine mean of old and new data, taking into consideration # (weighted) number of observations total_mu = (n_new * new_mu + n_past * mu) / n_total # Combine variance of old and new data, taking into consideration # (weighted) number of observations. This is achieved by combining # the sum-of-squared-differences (ssd) old_ssd = n_past * var new_ssd = n_new * new_var total_ssd = (old_ssd + new_ssd + (n_past / float(n_new * n_total)) * (n_new * mu - n_new * new_mu) ** 2) total_var = total_ssd / n_total return total_mu, total_var
def SNR (self, y): return self.P_0 - np.var(y)