def moments(self): """Calculate covariance and correlation matrices, trait, genotipic and ontogenetic means""" zs = np.array([ind["z"] for ind in self.pop]) xs = np.array([ind["x"] for ind in self.pop]) ys = np.array([ind["y"] for ind in self.pop]) bs = np.array([ind["b"] for ind in self.pop]) ymean = ys.mean(axis=0) zmean = zs.mean(axis=0) xmean = xs.mean(axis=0) ymean = ys.mean(axis=0) bmean = bs.mean(axis=0) phenotipic = np.cov(zs, rowvar=0, bias=1) genetic = np.cov(xs, rowvar=0, bias=1) heridability = genetic[np.diag_indices_from(genetic)] / phenotipic[np.diag_indices_from(phenotipic)] corr_phenotipic = np.corrcoef(zs, rowvar=0, bias=1) corr_genetic = np.corrcoef(xs, rowvar=0, bias=1) avgP = avg_ratio(corr_phenotipic, self.modules) avgG = avg_ratio(corr_genetic, self.modules) return { "y.mean": ymean, "b.mean": bmean, "z.mean": zmean, "x.mean": xmean, "P": phenotipic, "G": genetic, "h2": heridability, "avgP": avgP, "avgG": avgG, "corrP": corr_phenotipic, "corrG": corr_genetic, }
def lda(data,labels,redDim): # Centre data data -= data.mean(axis=0) nData = np.shape(data)[0] nDim = np.shape(data)[1] Sw = np.zeros((nDim,nDim)) Sb = np.zeros((nDim,nDim)) C = np.cov(np.transpose(data)) # Loop over classes classes = np.unique(labels) for i in range(len(classes)): # Find relevant datapoints indices = np.squeeze(np.where(labels==classes[i])) d = np.squeeze(data[indices,:]) classcov = np.cov(np.transpose(d)) Sw += np.float(np.shape(indices)[0])/nData * classcov Sb = C - Sw # Now solve for W and compute mapped data # Compute eigenvalues, eigenvectors and sort into order evals,evecs = la.eig(Sw,Sb) indices = np.argsort(evals) indices = indices[::-1] evecs = evecs[:,indices] evals = evals[indices] w = evecs[:,:redDim] newData = np.dot(data,w) return newData,w
def bootstrapped_intercluster_mahalanobis(cluster1, cluster2, n_boots=1000, fix_covariances=True): """Bootstrap the intercluster distance. Returns: m - The mean distance CI - 95% confidence interval on the distance distances - an array of the distances measured on each boot """ d_l = [] # Determine the covariance matrices, or recalculate each time if fix_covariances: icov1 = np.linalg.inv(np.cov(cluster1, rowvar=0)) icov2 = np.linalg.inv(np.cov(cluster2, rowvar=0)) else: icov1, icov2 = None, None # Bootstrap for n_boot in range(n_boots): # Draw idxs1 = np.random.randint(0, len(cluster1), len(cluster1)) idxs2 = np.random.randint(0, len(cluster2), len(cluster2)) # Calculate and store d_l.append(intercluster_mahalanobis( cluster1[idxs1], cluster2[idxs2], icov1, icov2)) # Statistics d_a = np.asarray(d_l) m = np.mean(d_a) CI = mlab.prctile(d_a, (2.5, 97.5)) return m, CI, d_a
def test_cov_parameters(self): # Ticket #91 x = np.random.random((3, 3)) y = x.copy() np.cov(x, rowvar=1) np.cov(y, rowvar=0) assert_array_equal(x, y)
def wprp_split(gals, red_split, box_size, cols=['ssfr', 'pred'], jack_nside=3, rpmin=0.1, rpmax=20.0, Nrp=25): # for 2 splits """ Calculates the 2PCF of gals binned by sSFR, separated by red_split. Note that sSFR can be substitued in _cols_ to bin by, say, concentration Accepts: gals - numpy array with objects, their positions, and attributes red_split - value which separates two populations box_size - box_size of the objects in gals cols - tags to specify the actual and predicted distribution. Defaults to ['ssfr', 'pred'], but could be modified to use, say ['c', 'pred_c'] (assuming they exist in gals). Returns: [r, [actual], [pred], [err], [chi2]] r - centers of r bins [actual] - clustering of red/blue galaxies [pred] - clustering of predicted red/blue galaxies [err] - errorbars for red/blue galaxies [chi2] - goodness of fit for red/blue galaxies """ r, rbins = make_r_scale(rpmin, rpmax, Nrp) n_jack = jack_nside ** 2 results = [] results.append(r) r_jack = [] b_jack = [] for col in cols: red = gals[gals[col] < red_split] blue = gals[gals[col] > red_split] r = calculate_xi(red, box_size, True, jack_nside, rpmin, rpmax, Nrp) b = calculate_xi(blue, box_size, True, jack_nside, rpmin, rpmax, Nrp) results.append([r[0], b[0]]) if jack_nside <= 1: r_var = r[1] b_var = b[1] else: r_jack.append(r[2]) b_jack.append(b[2]) if jack_nside > 1: r_cov = np.cov(r_jack[0] - r_jack[1], rowvar=0, bias=1) * (n_jack - 1) b_cov = np.cov(b_jack[0] - b_jack[1], rowvar=0, bias=1) * (n_jack - 1) r_var = np.sqrt(np.diag(r_cov)) b_var = np.sqrt(np.diag(b_cov)) results.append([r_var, b_var]) if jack_nside > 1: r_chi2 = calculate_chi_square(results[1][0], results[2][0], r_cov) b_chi2 = calculate_chi_square(results[1][1], results[2][1], b_cov) print "Goodness of fit for the red (lo) and blue (hi): ", r_chi2, b_chi2 else: d_r = results[1][0] - results[2][0] d_b = results[1][1] - results[2][1] r_chi2 = d_r**2/np.sqrt(r_var[0]**2 + r_var[1]**2) b_chi2 = d_b**2/np.sqrt(b_var[0]**2 + b_var[1]**2) results.append([r_chi2, b_chi2]) return results
def test_2d_wo_missing(self): # Test cov on 1 2D variable w/o missing values x = self.data.reshape(3, 4) assert_almost_equal(np.cov(x), cov(x)) assert_almost_equal(np.cov(x, rowvar=False), cov(x, rowvar=False)) assert_almost_equal(np.cov(x, rowvar=False, bias=True), cov(x, rowvar=False, bias=True))
def test_update_mean_cov_L_lmbda_converges_to_weighted_mean_and_cov(): N_init = 10 N = 10000 D = 2 X = np.random.randn(N, D) weights = np.random.rand(N) old_mean = np.average(X[:N_init], axis=0, weights=weights[:N_init]) old_cov_L = np.linalg.cholesky(np.cov(X[:N_init].T, ddof=0)) sum_old_weights = np.sum(weights[:N_init]) lmbdas = weights_to_lmbdas(sum_old_weights, weights[N_init:]) mean, cov_L = update_mean_cov_L_lmbda(X[N_init:], old_mean, old_cov_L, lmbdas) full_mean = np.average(X, axis=0, weights=weights) # the above method uses N rather than N-1 to normalise covariance (biased) try: full_cov = np.cov(X.T, ddof=0, aweights=weights) except TypeError: raise SkipTest("Numpy's cov method does not support aweights keyword.") cov = np.dot(cov_L, cov_L.T) assert_allclose(full_mean, mean) assert_allclose(full_cov, cov, atol=1e-2)
def testComponentSeparation(self): A = generate_covsig([[10,5,2],[5,10,2],[2,2,10]], 500) B = generate_covsig([[10,2,2],[2,10,5],[2,5,10]], 500) X = np.dstack([A,B]) W, V = csp(X,[1,2]) C1a = np.cov(X[:,:,0].dot(W).T) C2a = np.cov(X[:,:,1].dot(W).T) Y = np.dstack([B,A]) W, V = csp(Y,[1,2]) C1b = np.cov(Y[:,:,0].dot(W).T) C2b = np.cov(Y[:,:,1].dot(W).T) # check symmetric case self.assertTrue(np.allclose(C1a.diagonal(), C2a.diagonal()[::-1])) self.assertTrue(np.allclose(C1b.diagonal(), C2b.diagonal()[::-1])) # swapping class labels (or in this case, trials) should not change the result self.assertTrue(np.allclose(C1a, C1b)) self.assertTrue(np.allclose(C2a, C2b)) # variance of first component should be greatest for class 1 self.assertTrue(C1a[0,0] > C2a[0,0]) # variance of last component should be greatest for class 1 self.assertTrue(C1a[2,2] < C2a[2,2]) # variance of central component should be equal for both classes self.assertTrue(np.allclose(C1a[1,1], C2a[1,1]))
def test_sample_conditional_mixedkernel(session_tf): q_mu = np.random.randn(Data.M , Data.L) # M x L q_sqrt = np.array([np.tril(np.random.randn(Data.M, Data.M)) for _ in range(Data.L)]) # L x M x M Z = Data.X[:Data.M,...] # M x D N = int(10e5) Xs = np.ones((N, Data.D), dtype=float_type) values = {"Xnew": Xs, "q_mu": q_mu, "q_sqrt": q_sqrt} placeholders = _create_placeholder_dict(values) feed_dict = _create_feed_dict(placeholders, values) # Path 1: mixed kernel: most efficient route W = np.random.randn(Data.P, Data.L) mixed_kernel = mk.SeparateMixedMok([RBF(Data.D) for _ in range(Data.L)], W) mixed_feature = mf.MixedKernelSharedMof(InducingPoints(Z.copy())) sample = sample_conditional(placeholders["Xnew"], mixed_feature, mixed_kernel, placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=True) value, mean, var = session_tf.run(sample, feed_dict=feed_dict) # Path 2: independent kernels, mixed later separate_kernel = mk.SeparateIndependentMok([RBF(Data.D) for _ in range(Data.L)]) shared_feature = mf.SharedIndependentMof(InducingPoints(Z.copy())) sample2 = sample_conditional(placeholders["Xnew"], shared_feature, separate_kernel, placeholders["q_mu"], q_sqrt=placeholders["q_sqrt"], white=True) value2, mean2, var2 = session_tf.run(sample2, feed_dict=feed_dict) value2 = np.matmul(value2, W.T) # check if mean and covariance of samples are similar np.testing.assert_array_almost_equal(np.mean(value, axis=0), np.mean(value2, axis=0), decimal=1) np.testing.assert_array_almost_equal(np.cov(value, rowvar=False), np.cov(value2, rowvar=False), decimal=1)
def get_stats(arrs,interpolatenans=False): arrslen = len(arrs) if DEBUG_PRINT: print "array nums:", arrslen stats = [0] * arrslen for i,arr in enumerate(arrs): if(len(arrs[i].shape) > 2): stats[i] = None else: maskedarr = ma.masked_array(arrs[i],fill_value=0) if interpolatenans: arr = interpolate_nans(arr) else: arr = maskedarr.filled() # check and see what happens when you interpolate stats[i] = {'avgs':[np.mean(arr,axis=0),np.mean(arr,axis=1)], 'stdevs':[np.std(arr,axis=0),np.std(arr,axis=1)], 'cov':[0,0]} xlen = arr.shape[0] ylen = arr.shape[1] # get specific covariance values along x axis covx = np.zeros(xlen) covar = np.cov(arr) # get the covariance values by row for dim 1 for x in range(1,xlen): covx[x-1] = covar[x][x-1] stats[i]['cov'][0] = covx # get specific covariance values along y axis covy = np.zeros(ylen) covar = np.cov(arr,rowvar=0) # get the covariance values by col for dim 2 for y in range(1,ylen): covy[y-1] = covar[y][y-1] stats[i]['cov'][1] = covy return stats
def test_pairwise_distances_data_derived_params(n_jobs, metric, dist_function, y_is_x): # check that pairwise_distances give the same result in sequential and # parallel, when metric has data-derived parameters. with config_context(working_memory=1): # to have more than 1 chunk rng = np.random.RandomState(0) X = rng.random_sample((1000, 10)) if y_is_x: Y = X expected_dist_default_params = squareform(pdist(X, metric=metric)) if metric == "seuclidean": params = {'V': np.var(X, axis=0, ddof=1)} else: params = {'VI': np.linalg.inv(np.cov(X.T)).T} else: Y = rng.random_sample((1000, 10)) expected_dist_default_params = cdist(X, Y, metric=metric) if metric == "seuclidean": params = {'V': np.var(np.vstack([X, Y]), axis=0, ddof=1)} else: params = {'VI': np.linalg.inv(np.cov(np.vstack([X, Y]).T)).T} expected_dist_explicit_params = cdist(X, Y, metric=metric, **params) dist = np.vstack(tuple(dist_function(X, Y, metric=metric, n_jobs=n_jobs))) assert_allclose(dist, expected_dist_explicit_params) assert_allclose(dist, expected_dist_default_params)
def get_features(data): X = [d[0] for d in data] Y = [d[1] for d in data] Z = [d[2] for d in data] x_mean = np.mean(X) y_mean = np.mean(Y) z_mean = np.mean(Z) x_var = np.var(X) y_var = np.var(Y) z_var = np.var(Z) mean_magnitude = np.mean([math.sqrt(x*x + y*y +z*z) for (x,y,z) in izip(X,Y,Z)]) magnitude_mean = math.sqrt(x_mean*x_mean + y_mean*y_mean + z_mean*z_mean) sma = np.mean([math.fabs(x) + math.fabs(y) + math.fabs(z) for (x,y,z) in izip(X,Y,Z)]) corr_xy = (np.cov(X,Y) / (math.sqrt(x_var) * math.sqrt(y_var)))[0][1] corr_yz = (np.cov(Y,Z) / (math.sqrt(z_var) * math.sqrt(y_var)))[0][1] corr_xz = (np.cov(Z,X) / (math.sqrt(x_var) * math.sqrt(z_var)))[0][1] vector_d = [(x - x_mean, y - y_mean, z - z_mean) for (x,y,z) in izip(X,Y,Z)] vector_v = [x_mean, y_mean, z_mean] vector_p = [np.multiply((np.dot(d, vector_v)/np.dot(vector_v, vector_v)), vector_v) for d in vector_d] vector_h = [np.subtract(d, p) for d, p in izip(vector_d, vector_p)] mod_vector_p = [np.linalg.norm(p) for p in vector_p] mod_vector_h = [np.linalg.norm(h) for h in vector_h] cor_p_h = (np.cov(mod_vector_h,mod_vector_p) / (math.sqrt(np.var(mod_vector_h)) * math.sqrt(np.var(mod_vector_p))))[0][1] vector_p = np.mean(vector_p, axis=0) vector_h = np.mean(vector_h, axis=0) mod_vector_p = np.mean(mod_vector_p) mod_vector_h = np.mean(mod_vector_h) ret = [x_mean, y_mean, z_mean, x_var, y_var, z_var, mean_magnitude, magnitude_mean, sma, corr_xy, corr_yz, corr_xz, cor_p_h, mod_vector_p, mod_vector_h] ret.extend([x for x in vector_p]) ret.extend([x for x in vector_h]) return ret
def main(): fnm = 'prob3.data' data = md.read_data(fnm) D1 = data[0:8,].T D2 = data[8:,].T u1 = np.matrix((np.mean(D1[0,:]), np.mean(D1[1,:]))).T u2 = np.matrix((np.mean(D2[0,:]), np.mean(D2[1,:]))).T sigma1 = np.asmatrix(np.cov(D1, bias=1)) sigma2 = np.asmatrix(np.cov(D1, bias=1)) g1 = discrim_func(u1, sigma1) g2 = discrim_func(u2, sigma2) steps = 100 x = np.linspace(-2,2,steps) y = np.linspace(-6,6,steps) X,Y = np.meshgrid(x,y) z = [g1(X[r,c], Y[r,c]) - g2(X[r,c], Y[r,c]) for r in range(0,steps) for c in range(0,steps)] Z = np.array(z) px = X.ravel() py = Y.ravel() pz = Z.ravel() gridsize = 50 plot = plt.subplot(111) plt.hexbin(px,py,C=pz, gridsize=gridsize, cmap=cm.jet, bins=None) cb = plt.colorbar() cb.set_label('g1 minus g2') return plot
def cov_estimation(list_of_recarrays, index_name, pair_wise=False): def get_the_other_name(rec, index_name): assert len(rec.dtype.names) == 2 name = [nm for nm in rec.dtype.names if nm != index_name] assert len(name) == 1 return name[0] for array in list_of_recarrays: array[get_the_other_name(array, index_name)] = winsorize(array[get_the_other_name(array, index_name)], 99) nn = len(list_of_recarrays) if not pair_wise: new_rec = list_of_recarrays[0] for ii in range(1, nn): new_rec = rec_join(index_name, new_rec, list_of_recarrays[ii], jointype='inner', defaults=None, r1postfix='', r2postfix=str(ii+1)) dat_mat = np.c_[[new_rec[nm] for nm in new_rec.dtype.names if nm != index_name]] covmat = np.cov(dat_mat) else : covmat = np.zeros((nn, nn)) for ii in range(0, nn): covmat[ii,ii] = list_of_recarrays[ii][get_the_other_name(list_of_recarrays[ii], index_name)].var() for jj in range(ii+1, nn): new_rec = rec_join(index_name, list_of_recarrays[ii], list_of_recarrays[jj], jointype='inner', defaults=None, r1postfix='1', r2postfix='2') dat_mat = np.c_[[new_rec[nm] for nm in new_rec.dtype.names if nm != index_name]] tmp_cov = np.cov(dat_mat)[0,1] covmat[ii,jj] = tmp_cov covmat[jj,ii] = tmp_cov return covmat
def test_nancov(self): targ0 = np.cov(self.arr_float_2d, self.arr_float1_2d)[0, 1] targ1 = np.cov(self.arr_float_2d.flat, self.arr_float1_2d.flat)[0, 1] self.check_nancorr_nancov_2d(nanops.nancov, targ0, targ1) targ0 = np.cov(self.arr_float_1d, self.arr_float1_1d)[0, 1] targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1] self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1)
def dataNorm(self): SXX = np.cov(self.X) U, l, Ut = LA.svd(SXX, full_matrices=True) H = np.dot(LA.sqrtm(LA.inv(np.diag(l))),Ut) self.nX = np.dot(H,self.X) #print np.cov(self.nX) #print "mean:" #print np.mean(self.nX) SYY = np.cov(self.Y) U, l, Ut = LA.svd(SYY, full_matrices=True) H = np.dot(LA.sqrtm(LA.inv(np.diag(l))),Ut) #print "H" #print H self.nY = np.dot(H,self.Y) #print np.cov(self.nY) print "dataNorm_X:" for i in range(len(self.nX)): print(self.nX[i]) print("---") print "dataNorm_Y:" for i in range(len(self.nY)): print(self.nY[i]) print("---")
def poly_to_gaussian(points): """Given `points` of the border of a polygon in local space, return the center and covariance matrix of the smallest area enclosing ellipse""" import subprocess as sp np.savetxt('__poly', points, fmt='%.10f %.10f') params = sp.check_output('./ellipse', shell=True) a, b, c, d, f, g = [float(_) for _ in params.decode('utf8').split(' ')] if b*b-a*c < 1e-8: return tuple(points.mean(0)), np.cov(points.T) x0 = (c*d-b*f)/(b*b-a*c) y0 = (a*f-b*d)/(b*b-a*c) numerator = 2*(a*f*f+c*d*d+g*b*b-2*b*d*f-a*c*g) denom_sqrt = np.sqrt(4*b*b+(a-c)**2) _a = np.sqrt(numerator/((b*b-a*c)*(denom_sqrt - (a+c)))) _b = np.sqrt(numerator/((b*b-a*c)*(-denom_sqrt - (a+c)))) det = abs(np.linalg.det([[a, b, d], [b, c, f], [d, f, g]])) if det < 1e-6 or np.any(np.isnan([_a, _b])): # screw it, let's do something less accurate but more straightforward return tuple(points.mean(0)), np.cov(points.T) if b < 1e-5: theta = 0 if a < c else np.pi/2 else: theta = .5*arccot((a-c)/(2*b)) if a > c: theta += np.pi/2 R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) S = np.diag([_a, _b]) T = R.dot(S) return (x0, y0), .5*T.dot(T.T)
def test_cov_parameters(self,level=rlevel): """Ticket #91""" x = np.random.random((3, 3)) y = x.copy() np.cov(x, rowvar=1) np.cov(y, rowvar=0) assert_array_equal(x, y)
def check_cov_parameters(self,level=rlevel): """Ticket #91""" x = N.random.random((3,3)) y = x.copy() N.cov(x,rowvar=1) N.cov(y,rowvar=0) assert_array_equal(x,y)
def fit_gaussians(x_train_boxcox, y_train): """ Fit class-dependent multivariate gaussians on the training set. Parameters ---------- x_train_boxcox : np.array [n_samples, n_features_trans] Transformed training features. y_train : np.array [n_samples] Training labels. Returns ------- rv_pos : multivariate normal multivariate normal for melody class rv_neg : multivariate normal multivariate normal for non-melody class """ pos_idx = np.where(y_train == 1)[0] mu_pos = np.mean(x_train_boxcox[pos_idx, :], axis=0) cov_pos = np.cov(x_train_boxcox[pos_idx, :], rowvar=0) neg_idx = np.where(y_train == 0)[0] mu_neg = np.mean(x_train_boxcox[neg_idx, :], axis=0) cov_neg = np.cov(x_train_boxcox[neg_idx, :], rowvar=0) rv_pos = multivariate_normal(mean=mu_pos, cov=cov_pos, allow_singular=True) rv_neg = multivariate_normal(mean=mu_neg, cov=cov_neg, allow_singular=True) return rv_pos, rv_neg
def correlation(): df = pd.read_csv("dataset/train_new.csv") # df = df.dropna(axis=0,how="any") print df.describe() # print df.head() param=[] correlation=[] abs_corr=[] covariance = [] columns = ["Applicant_Gender","App_age","Applicant_Occupation","Applicant_Qualification","Manager_age","Manager_Status","Manager_Gender","Manager_Business","Manager_Business2","Manager_Num_Application"] for c in columns: #Check if binary or continuous if len(df[c].unique())<=12: corr = spearmanr(df['Business_Sourced'],df[c])[0] print "spear",c,corr y = df['Business_Sourced'] x = df[c] X = np.vstack((y,x)) covar = np.cov(X) else: corr = pointbiserialr(df['Business_Sourced'],df[c])[0] print "point",c,corr y = df['Business_Sourced'] x = df[c] X = np.vstack((y,x)) covar = np.cov(X) param.append(c) correlation.append(corr) abs_corr.append(abs(corr)) # covariance.append(covar[0][1]) print covariance
def test_1d_wo_missing(self): "Test cov on 1D variable w/o missing values" x = self.data assert_almost_equal(np.cov(x), cov(x)) assert_almost_equal(np.cov(x, rowvar=False), cov(x, rowvar=False)) assert_almost_equal(np.cov(x, rowvar=False, bias=True), cov(x, rowvar=False, bias=True))
def fit(self, data, chunks): """Learn the RCA model. Parameters ---------- data : (n x d) data matrix Each row corresponds to a single instance chunks : (n,) array of ints When ``chunks[i] == -1``, point i doesn't belong to any chunklet. When ``chunks[i] == j``, point i belongs to chunklet j. """ data, M_pca = self._process_data(data) chunks = np.asanyarray(chunks, dtype=int) chunk_mask, chunked_data = _chunk_mean_centering(data, chunks) inner_cov = np.cov(chunked_data, rowvar=0, bias=1) dim = self._check_dimension(np.linalg.matrix_rank(inner_cov)) # Fisher Linear Discriminant projection if dim < data.shape[1]: total_cov = np.cov(data[chunk_mask], rowvar=0) tmp = np.linalg.lstsq(total_cov, inner_cov)[0] vals, vecs = np.linalg.eig(tmp) inds = np.argsort(vals)[:dim] A = vecs[:, inds] inner_cov = A.T.dot(inner_cov).dot(A) self.transformer_ = _inv_sqrtm(inner_cov).dot(A.T) else: self.transformer_ = _inv_sqrtm(inner_cov).T if M_pca is not None: self.transformer_ = self.transformer_.dot(M_pca) return self
def get_projection(): #get the matrix for raw data cla0_matri = np.asmatrix(cla_0) cla1_matri = np.asmatrix(cla_1) #compute the mean for each classes #select the 8 features mu_0 =(cla0_matri.transpose()[:8]).mean(1) mu_1 =(cla1_matri.transpose()[:8]).mean(1) #print mu_0,mu_1 #compute the covariance matrix for each class cov_0 = np.asmatrix(np.cov(cla0_matri.transpose()[:8])) cov_1 = np.asmatrix(np.cov(cla1_matri.transpose()[:8])) #compute the scatter matrices s0 and s1 for each class s_0 = np.dot((len(cla0_matri)-1),cov_0) s_1 = np.dot((len(cla1_matri)-1),cov_1) #compute the winthin class scatter s_w = np.add(s_0,s_1) #compute the inverse of winthin calss scatter inv_s = np.linalg.inv(s_w) #get the finally optimal line direction v dir_v = np.matrix.dot(inv_s,np.subtract(mu_0,mu_1)) print dir_v #get the projection for all data set proj_data = np.matrix.dot(dir_v.transpose(),((np.asmatrix(data_set)).transpose())[:8]) proj_lis = (proj_data.tolist())[0] #adding the execlude labels to the projected data for it in range(0,len(proj_lis)): temp_lis = [] temp_lis.append(float(proj_lis[it])) #adding the label temp_lis.append(int(data_set[it][8])) proj_data_set.append(temp_lis)
def kal0(x,sv=None,Kdisp=1.0,Nsamp=1000,L=5,Norder=3,pg=1.0,vg=1.0, sigma0=1000,N0=200,Prange=8): x = x.T # Time scale if sv is None: mux = x-mean(x,0) phi = unwrap(angle(mux[:,0]+1j*mux[:,1])) sv= 2*pi*x.shape[0]/abs(phi[-1]-phi[0]) # System matrix A = Kdisp*eye(2*x.shape[1]) A[:x.shape[1],x.shape[1]:2*x.shape[1]] = eye(x.shape[1])/sv # Observation matrix C = zeros((x.shape[1],2*x.shape[1])) C[:x.shape[1],:x.shape[1]] = eye(x.shape[1]) # Observation covariance R = cov((x[:-1]-x[1:]).T)/sqrt(2.0) # System covariance idx = random.randint(x.shape[0]-5,size=(Nsamp)) idx = vstack([idx+i for i in xrange(L)]) tx = x[idx].reshape(idx.shape[0],-1) P = array([[(i-(L-1)/2)**j for i in xrange(L)] for j in xrange(Norder)]) K = lstsq(P.T,tx)[0] s = (cov((tx-dot(P[:-1].T,K[:-1]))[1])-cov((tx-dot(P.T,K))[1]))/cov((tx-dot(P[:-1].T,K[:-1]))[1]) D = zeros_like(A) D[:x.shape[1],:x.shape[1]] = R*pg D[x.shape[1]:,x.shape[1]:] = R*vg Q = D*s return(Kalman(A,C,Q,R))
def stop_training(self, destroy_training_set = True): self.covariance = numpy.cov(self.training_set.T) self.mean = numpy.mean(self.training_set, axis=0) xy = self.training_set[:,-2:] self.xycovariance = numpy.cov(xy.T) self.xymean = numpy.mean(xy, axis=0) self.training_set = None
def run(self,X): if self.covType == "diag": Sigma = np.diag(np.diag(np.cov(X.T))) elif self.covType == "full": Sigma = np.cov(X.T) else: print "error" self.mu = None self.labels = None n,p = X.shape mu,pi = self._initialize(X) iter = 0 converge = False while iter < self.maxIter and not converge: old_mu = mu.copy() old_pi = pi.copy() gamma = self._estep(X, old_mu, Sigma, old_pi) mu,pi = self._mstep(X,gamma) if np.sum(abs(old_mu-mu))/np.sum(abs(old_mu))<0.001: converge=True print("GMM algorithm converges in "+str(iter+1)+" iterations") iter = iter + 1 if iter == self.maxIter: print("GMM algorithm fails to converge in "+str(iter)+" iterations") labels = [np.argmax(g) for g in gamma] self.mu = mu self.labels =labels
def plt_1d(class1, class2): prior1 = 0.5 prior2 = 0.5 mean1 = np.array([np.mean(class1[:, 0])]) mean2 = np.array([np.mean(class2[:, 0])]) # print mean1, mean2 cov1 = np.array([[np.cov([class1[:, 0]])]]) cov2 = np.array([[np.cov([class2[:, 0]])]]) # print cov1, cov2 discriminant_function1 = gdf.gen_discriminant_function_of_normal_distribution(mean1, cov1, prior1) discriminant_function2 = gdf.gen_discriminant_function_of_normal_distribution(mean2, cov2, prior2) # X = np.linspace(np.amin(class1[:, 0]), np.amax(class1[:, 0]), 200) X = np.linspace(-100, 100, 100) y1 = [discriminant_function1(np.array([x])) for x in X] y2 = [discriminant_function2(np.array([x])) for x in X] plt.plot(X, y1) plt.plot(X, y2) plt.show()
def covandcoef(compare_data): hx = [] hy = [] ox = [] oy = [] tx = [] ty = [] for i in compare_data: hx.append(i[4]) hy.append(i[7]) for i in range(0,7): ox.append(compare_data[i][4]) oy.append(compare_data[i][7]) for i in range(0,89): tx.append(compare_data[i][4]) ty.append(compare_data[i][7]) X = np.vstack((hx,hy)) Z = np.vstack((ox,oy)) Y = np.vstack((tx,ty)) return [[np.cov(X)[0][1],np.corrcoef(X)[0][1]],[np.cov(Y)[0][1],np.corrcoef(Y)[0][1]],[np.cov(Z)[0][1],np.corrcoef(Z)[0][1]]]
def ldaTransform(data): C0 = data[data[:, -1] == -1] C1 = data[data[:, -1] == 1] C0 = C0[:, :-1] C1 = C1[:, :-1] S0 = np.cov(np.transpose(C0)) S1 = np.cov(np.transpose(C1)) SW = S0 + S1 Mu0 = np.mean(C0, axis = 0) Mu1 = np.mean(C1, axis = 0) Mu = np.mean(data, axis = 0) Mu = Mu[:-1] Mu = np.matrix(Mu) Mu0 = np.matrix(Mu0) Mu1 = np.matrix(Mu1) SB = C0.shape[0] * np.transpose(Mu0 - Mu) * (Mu0 - Mu) + C1.shape[0] * np.transpose(Mu1 - Mu) * (Mu1 - Mu) Swin = LA.pinv(SW) #costly Swin = np.matrix(Swin) SwinSB = Swin * SB #costly e, v = LA.eig(SwinSB) #costly s = np.argsort(e)[::-1] v = np.array(v) ev = np.zeros(v.shape) for i in xrange(e.shape[0]): ev[:, i] = v[:, s[i]] w = ev[:, 0] w = np.matrix(w) return w
def fit(self, descriptors, use_multiprocessing=True): """ Fit the ITQ model given the input set of descriptors :param descriptors: Iterable of ``DescriptorElement`` vectors to fit the model to. :type descriptors: collections.Iterable[smqtk.representation.DescriptorElement] :raises RuntimeError: There is already a model loaded :return: Matrix hash codes for provided descriptors in order. :rtype: numpy.ndarray[bool] """ if self.has_model(): raise RuntimeError("Model components have already been loaded.") dbg_report_interval = None if self.logger().getEffectiveLevel() <= logging.DEBUG: dbg_report_interval = 1.0 # seconds if not hasattr(descriptors, "__len__"): self._log.info("Creating sequence from iterable") descriptors_l = [] rs = [0] * 7 for d in descriptors: descriptors_l.append(d) report_progress(self._log.debug, rs, dbg_report_interval) descriptors = descriptors_l self._log.info("Creating matrix of descriptors for fitting") x = elements_to_matrix(descriptors, report_interval=dbg_report_interval, use_multiprocessing=use_multiprocessing) self._log.debug("descriptor matrix shape: %s", x.shape) self._log.debug("Info normalizing descriptors by factor: %s", self.normalize) x = self._norm_vector(x) self._log.info("Centering data") self.mean_vec = numpy.mean(x, axis=0) x -= self.mean_vec self._log.info("Computing PCA transformation") # numpy and matlab observation format is flipped, thus the added # transpose. self._log.debug("-- computing covariance") c = numpy.cov(x.transpose()) # Direct translation from UNC matlab code # - eigen vectors are the columns of ``pc`` self._log.debug('-- computing linalg.eig') l, pc = numpy.linalg.eig(c) # ordered by greatest eigenvalue magnitude, keeping top ``bit_len`` self._log.debug('-- computing top pairs') top_pairs = sorted(zip(l, pc.transpose()), key=lambda p: p[0], reverse=1)[:self.bit_length] # # Harry translation -- Uses singular values / vectors, not eigen # # - singular vectors are the rows of pc # # - I think there is an additional error of not taking the transpose # # of ``pc`` when computing ``top_pairs``. # pc, l, _ = numpy.linalg.svd(c) # top_pairs = sorted(zip(l, pc), # key=lambda p: p[0], # reverse=1 # )[:self.bit_length] # Eigen-vectors of top ``bit_len`` magnitude eigenvalues self._log.debug("-- top vector extraction") pc_top = numpy.array([p[1] for p in top_pairs]).transpose() self._log.debug("-- transform centered data by PC matrix") xx = numpy.dot(x, pc_top) self._log.info("Performing ITQ to find optimal rotation") c, self.rotation = self._find_itq_rotation(xx, self.itq_iterations) # De-adjust rotation with PC vector self.rotation = numpy.dot(pc_top, self.rotation) self.save_model() return c
def single_factor_analysis(X, k=5, cycles=100, tol=.001): """ Fit Factor Analysis model using EM Iterative Expectation-Maximization algorithm that stops once a proportional change less than the specified tolerance in the log likelihood or the specified number of cycles has been reached. Return matrices are strictly real-valued. Parameters ---------- X : array_like, shape (n_samples, n_features) A 2-D data matrix real-valued k : int, optional Number of factors (default 5) cycles : int, optional Maximum number of cycles of EM (default 100) tol : float, optional Tolerance value (default 0.001) Returns ------- Lambda : array_like A 2-D ndarray containing the factor loading matrix (Lambda) Psi : array_like A 2-D ndarray containing the diagonal uniquenesses matrix lkhd_list : list List of log likelihood values during iterations. Generally follows a positive logarithmic curve. Notes ----- Based upon the algorithm initially described in: http://www.cs.toronto.edu/~fritz/absps/tr-96-1.pdf """ n_samples, n_features = X.shape # X assumed to be zero mean row_mean = np.mean(X, axis=0) X -= row_mean[np.newaxis, :] # subtract mean of row # X'*X followed by element-wise division by n_samples XX = X.T.dot(X) / n_samples XX_diag = np.diag(XX) cov_X = np.cov(X, rowvar=False) #shape n_features by n_features scale = np.linalg.det(cov_X)**(1 / n_features) Psi = np.diag(cov_X) # Start Lambda at random values Lambda = np.random.randn(n_features, k) * np.sqrt(scale / k) I = np.eye(k) const = -n_features / 2 * np.log(2 * np.pi) log_lkhd = 0 lkhd_list = [] for i in range(cycles): # compute expectation Psi_diag = np.diag(1 / Psi) # diag of element-wise ** -1 PsiLambda = Psi_diag.dot(Lambda) # solve matrix inversion M = Psi_diag - PsiLambda.dot(np.linalg.inv(I +\ Lambda.T.dot(PsiLambda))).dot(PsiLambda.T) M_det = np.sqrt(np.linalg.det(M)) Beta = Lambda.T.dot(M) # first moment of factors XXBeta_prime = XX.dot(Beta.T) # compute second moment of factors ZZ = I - Beta.dot(Lambda) + Beta.dot(XXBeta_prime) # compute log likelihood last_log_lkhd = log_lkhd log_lkhd = n_samples * const + n_samples * np.log( M_det) - 0.5 * n_samples * np.sum(np.diag(M.dot(XX))) lkhd_list.append(log_lkhd) # compute maximization # update Lambda Lambda = XXBeta_prime.dot(np.linalg.inv(ZZ)) # update Psi Psi = XX_diag - np.diag(Lambda.dot(XXBeta_prime.T)) if i == 0: log_lkhd_init = log_lkhd elif (log_lkhd < last_log_lkhd): warnings.warn('Local decrease in log likelihood') elif ((log_lkhd - log_lkhd_init) < (1 + tol) *\ (last_log_lkhd - log_lkhd_init)): break else: warnings.warn( 'Factor analysis did not converge for tol : %.4f, you may want to increase the cycles' % tol) return Lambda, Psi, lkhd_list
def step(self): self._current_generation += 1 print('-' * 10) print('Generation', self._current_generation) for species, population in enumerate(self._populations): print('-- Species', species) self.current_group = species population_eval = { mem: self._cached_evaluation(mem) for mem in population } # Sorted population by evaluation. Largest/worst member first sorted_population = sorted(population_eval, key=lambda x: population_eval[x], reverse=True) member_points = np.array([ self._decode_member(mem) for mem in sorted_population[self._mu_important:] ]) covariance = np.copy(self._covariances[species]) num_members = len(member_points) for i in range(member_points.shape[1]): #E_i = np.mean(member_points[:,i]) # estimated expected value for dimension i E_i = self._centroids[species][i] for j in range(i, member_points.shape[1]): covariance[i, j] = np.sum( (member_points[:, i] - E_i)**2) / (num_members - 1) covariance[j, i] = covariance[i, j] self._covariances[species] = np.cov(member_points.T) self._covariance_histories[species].append( np.copy(self._covariances[species])) self._centroids[ species] = self._centroids[species] + self._learning_rate * ( np.mean(member_points, axis=0) - self._centroids[species]) self._centroid_histories[species].append( np.copy(self._centroids[species])) print('Covariance & mean:') print(self._covariances[species]) print('\t\t\t\t', self._centroids[species]) print('Expansion?') #for d in range(self.num_dimensions): avg_covariance = np.mean(self._covariance_histories[species][-5:], axis=0) cur_dimension_variance = np.mean( np.diag(self._covariances[species])) avg_dimension_variance = np.mean(np.diag(avg_covariance)) print('Cur variance over dimensions:', cur_dimension_variance) print('Avg variance over dimensions:', avg_dimension_variance) print('Averaged historical covariance:') print(np.mean(self._covariance_histories[species][-5:], axis=0)) split_dimensions = [] for d in range(self.num_dimensions): if len(self._centroid_histories[species]) > 2: print( 'Centroid path for dimension %s is %.4f -> %.4f -> %.4f' % (d, self._centroid_histories[species][-3][d], self._centroid_histories[species][-2][d], self._centroid_histories[species][-1][d])) d0 = self._centroid_histories[species][-3][ d] - self._centroid_histories[species][-2][d] d1 = self._centroid_histories[species][-2][ d] - self._centroid_histories[species][-1][d] if np.sign(d0) != np.sign(d1): print('Centroid path for dim %s changed direction' % d) print( 'abs(c[-2]-c[-1]) = %.4f -> %.2f of variance V_d[t-1]=%.4f' % (abs(d0), abs(d0) / self._covariance_histories[species][-2][d, d], self._covariance_histories[species][-2][d, d])) print( 'abs(c[-1]-c[0]) = %.4f -> %.2f of variance V_d[t]=%.4f' % (abs(d1), abs(d1) / self._covariances[species][d, d], self._covariance_histories[species][-1][d, d])) if self._covariances[species][d, d] > np.maximum( 1, self._covariance_histories[species][-2][d, d]): split_dimensions.append(d) if self._covariances[species][ d, d] > self._covariance_histories[species][-2][d, d]: print( 'Dimension %s variance is greater than its last variance' % d) if self._covariances[species][d, d] / np.mean( avg_covariance[d, d]) > 1: print( 'Dimension %s is greater than its historical average variance' % d) if self._covariances[species][d, d] / avg_dimension_variance > 1: print( 'Dimension %s is greater than total historical average variance' % d) if self._covariances[species][d, d] / cur_dimension_variance > 1: print( 'Dimension %s is greater than current average variance' % d) new_members = np.array([ self.sample_random(species) for _ in range(self._mu_important) ]) member_points = np.concatenate([member_points, new_members]) self._populations[species] = { self._encode_member(mem) for mem in member_points } # Perform possible species merges/deaths # Deaths: species is consistently worse than others # Merges: species close to each other # Perform possible species splits if len(split_dimensions) > 0: print('Performing population split along dimensions %s' % split_dimensions) centroid_0 = self._centroids[species] centroid_1 = np.copy(self._centroid_histories[species][-2]) centroid_1[split_dimensions] = self._centroids[species][ split_dimensions] covariance = np.copy(self._covariances[species]) new_species = len(self._populations) self._centroids.append(centroid_1) self._covariances.append(covariance) self._centroid_histories.append([centroid_1]) self._covariance_histories.append([covariance]) self._populations.append({ self._encode_member(self.sample_random(new_species)) for _ in range(self._population_size) }) # delete historical record self._centroid_histories[species] = self._centroid_histories[ species][-2:] self._covariance_histories[ species] = self._covariance_histories[species][-2:]
def jeffrey_divergence_score(X, labels): """ Implements the score based on the Jeffrey divergence that appears in: Said, A.; Hadjidj, R. & Foufou, S. "Cluster validity index based on Jeffrey divergence" Pattern Analysis and Applications, Springer London, 2015, 1-11 :param X: :param labels: :return: """ llabels = np.unique(labels) poslabels = maplabels(llabels) nclust = len(llabels) # compute the centroids centroids = np.zeros((nclust, X.shape[1])) for idx in llabels: center = np.zeros((1, X.shape[1])) center_mask = labels == idx center += np.sum(X[center_mask], axis=0) center /= center_mask.sum() centroids[poslabels[idx]] = center lcovs = [] linvcovs = [] for idx in llabels: cov_mask = labels == idx covar = np.cov(X[cov_mask].T) lcovs.append(covar) linvcovs.append(np.linalg.inv(covar)) traces = np.zeros((nclust, nclust)) for idx1 in llabels: for idx2 in llabels: traces[poslabels[idx1], poslabels[idx2]] = np.trace( np.dot(linvcovs[poslabels[idx1]], lcovs[poslabels[idx2]])) traces[poslabels[idx1], poslabels[idx2]] += np.trace( np.dot(linvcovs[poslabels[idx2]], lcovs[poslabels[idx1]])) traces[poslabels[idx1], poslabels[idx2]] /= 2.0 sumcov = np.zeros((nclust, nclust)) for idx1 in llabels: for idx2 in llabels: v1 = centroids[poslabels[idx1]] v2 = centroids[poslabels[idx2]] vm = v1 - v2 mcv = linvcovs[poslabels[idx1]] + linvcovs[poslabels[idx2]] sumcov[poslabels[idx1], poslabels[idx2]] = np.dot(vm.T, np.dot(mcv, vm)) sumcov[poslabels[idx1], poslabels[idx2]] /= 2.0 ssep = 0.0 for idx1 in llabels: minv = np.inf for idx2 in llabels: if idx1 != idx2: val = traces[poslabels[idx1], poslabels[idx2]] + sumcov[ poslabels[idx1], poslabels[idx2]] - centroids.shape[1] if minv > val: minv = val ssep += minv scompact = 0.0 for idx in llabels: center_mask = labels == idx dvector = euclidean_distances(X[center_mask], centroids[poslabels[idx]], squared=True) scompact += dvector.max() return scompact / ssep
np.savetxt("results/" + todaystr + "/CFDvecs.tsv", results, delimiter='\t') #import io #out_m = io.open('meta.tsv', 'w', encoding='utf-8') #for i in valid_generator: #[out_m.write(str(x) + "\n") for x in valid_generator.filenames] #out_m.close() #------------------------------------------------------------ MU = np.mean(results, axis=0) SIGMA = np.cov(results, rowvar=0) from scipy.stats import multivariate_normal var = multivariate_normal(MU, SIGMA) pdftest=var.pdf(results) log_pdftest= np.log(pdftest) np.savetxt("results/" + todaystr + "/CFD_LL.tsv", log_pdftest, delimiter='\t')
# Test on 2D Gaussian mu = np.array([-2, 5]) a = npr.rand(2, 2) cov = np.dot(a, a.T) mvn = priors.MultivariateNormal(mu=mu, cov=cov) x_samples = np.zeros((2, n)) x = np.zeros(2) for i in xrange(n): if i % 1000 == 0: print 'Sample %d/%d' % (i, n) x, cur_ll = slice_sample(x, mvn.logprob) x_samples[:, i] = x.copy() mu_samp = np.mean(x_samples, axis=1) print '2D Gaussian:' print 'Actual mean: [%f,%f]' % (mu[0], mu[1]) print 'Mean of samples: [%f,%f]' % (mu_samp[0], mu_samp[1]) print 'Actual Cov:' print str(cov) print 'Cov of samples' print str(np.cov(x_samples)) # plt.figure(1) # plt.clf() # plt.hist(x_samples, 40) # plt.savefig('slice_sampler_test.pdf')
from numpy import array from numpy import mean from numpy import cov from numpy.linalg import eig # define a matrix A = array([[1, 2], [3, 4], [5, 6]]) print(A) # calculate the mean of each column M = mean(A.T, axis=1) print(M) # center columns by subtracting column means C = A - M print(C) # calculate covariance matrix of centered matrix V = cov(C.T) print(V) # eigendecomposition of covariance matrix values, vectors = eig(V) print(vectors) print(values) # project data P = vectors.T.dot(C.T) print(P.T)
vale_closing_prices, label='AAPL Closing Prices', linewidth=2, color='orangered', linestyle='-') # 计算两个股票的协方差 bhp_mean = np.mean(bhp_closing_prices) vale_mean = np.mean(vale_closing_prices) # 离差 d1 = bhp_closing_prices - bhp_mean d2 = vale_closing_prices - vale_mean cov = np.mean(d1 * d2) print(cov) # 计算相关系数 s = cov / (np.std(bhp_closing_prices) * \ np.std(vale_closing_prices)) print(s) # 获取相关性矩阵 m = np.corrcoef(bhp_closing_prices, vale_closing_prices) print(m) # 获取协方差矩阵 cm = np.cov(bhp_closing_prices, vale_closing_prices) print(cm) plt.legend() plt.gcf().autofmt_xdate() plt.show()
def get_gaussian_cov_matrix(df: pd.DataFrame, four_momentum_columns: List[str]) -> np.ndarray: return np.cov(df.loc[:, four_momentum_columns], rowvar=False)
import math import pylab mu_1 = 4 variance_1 = 4 sigma_1 = math.sqrt(variance_1) X_1 = np.random.normal(mu_1, sigma_1, 100) print "the mean of X_1 is %s" %(np.mean(X_1)) mu_2 = 3 variance_2 = 9 sigma_2 = math.sqrt(variance_2) X_2 = X_1/2 + np.random.normal(mu_2, sigma_2, 100) print "the mean of X_2 is %s" %(np.mean(X_2)) cov_matrix=np.cov(X_1,X_2) print "the covarience matrix is %s" %(cov_matrix) val, vec = np.linalg.eig(cov_matrix) print "the eigenvalues are %s" %(val) print "the eigenvectors are %s" %(vec) plt.figure(1) pylab.ylim([-15,15]) pylab.xlim([-15,15]) plt.scatter(X_1,X_2) mu = (np.mean(X_1),np.mean(X_2)) Vec_A = [np.mean(X_1),np.mean(X_2),vec[0][0], vec[0][1]] Vec_B = [np.mean(X_1),np.mean(X_2),vec[1][0], vec[1][1]]
nData = X_train.shape[0] nDimension = X_train.shape[1] # Applying BIC to find optimal nCluster: 7 # nCluster = Optimize_nCluster.optimal_number_of_components(X_train) nCluster = 7 # Applying K-means to initialize parameters kmeans = KMeans(n_clusters=nCluster, random_state=0).fit(X_train) means = kmeans.cluster_centers_ # init mu priors = np.zeros(nCluster) covariances = np.zeros((nCluster, nDimension, nDimension)) # using "full" covariance_type for k in range(nCluster): Xk = X_train[np.where(kmeans.labels_ == k)[0]] priors[k] = float(Xk.shape[0]) / nData if np.size(Xk): covariances[k] = np.cov(Xk.T) #Initialzie covariance matrices via points in each KMeans-cluster else: covariances[k] = np.cov(X_train.T) # part2.2: Expectation-Maximization def calculate_probability_density(X, means, covariances): probability_density = np.zeros((nData, nCluster)) for i in range(X.shape[0]): for k in range(nCluster): vector_2d = np.reshape((X[i] - means[k]), (nDimension, 1)) a = np.exp(-0.5 * np.dot(np.dot(vector_2d.T, np.linalg.inv(covariances[k])), vector_2d)[0][0]) b = np.sqrt(np.power(2 * np.pi, nDimension) * np.linalg.det(covariances[k])) #if (i == 0 and k == 0) : print(np.linalg.det(covariances[0])) #print(i, np.power(2 * np.pi, nDimension) * np.linalg.det(covariances[k])) #print(i, np.linalg.det(covariances[k])) probability_density[i][k] = a / b
def estimate_full_gradient_var(data): var_4_fg = np.cov(data, rowvar=False) var_fg = var_4_fg / (N) return var_fg
def random_covariance(n): pts = np.random.rand(n,n+1) return np.cov(pts)
def cem_evolution_step(mean, cov, params, sorted_ids): best_ids = sorted_ids[:ELITE_SIZE] new_mean = (1-ALPHA) * mean + ALPHA * np.mean(params[best_ids], 0) new_cov = (1-ALPHA) * cov + ALPHA * np.cov(params[best_ids], rowvar=False) return (new_mean, new_cov)
return matrix if __name__ == '__main__': X = [1, -1, 4] Y = [2, 1, 3] Z = [1, 3, -1] dat = np.column_stack([X, Y, Z]).T print dat print 'covs' print 'cov(X, X): {:.2f} {:.2f}'.format(cov(X, X), var(X)) print 'cov(Y, Y): {:.2f} {:.2f}'.format(cov(Y, Y), var(Y)) print 'cov(Z, Z): {:.2f} {:.2f}'.format(cov(Z, Z), var(Z)) print 'mine' for r in covmat(X, Y, Z): print(' {:> 3.2f}' * len(r)).format(*r) print 'numpy' for r in np.cov(dat): print(' {:> 3.2f}' * len(r)).format(*r) 'eigenvectors' print np.linalg.eig(np.cov(dat)) for r in np.linalg.eig(np.cov(dat)): print r # print (' {:> 3.2f}'*len(r)).format(*r)
plt.title(target) plt.xlabel('Log(concentration)') # root mean squared error # incorporate retention time # spearman rank coefficient # %% # manuelly check r_value # pearson correlation coefficient from numpy import cov from scipy.stats import pearsonr target = 'c8' mask = new_data_melt['compound'] == target covariance = cov(new_data_melt[mask]['concentration'], new_data_melt[mask]['area']) p_r_value = pearsonr(new_data_melt[mask]['concentration'], new_data_melt[mask]['area']) print(p_r_value) # %% # now I need to remake the linear equation for each regression line # and plug in the respective area values for y and find the concentration # store all dataframes in a dictionary dict = {compound: pd.DataFrame() for compound in reg_df.index} # the range of x values only having the concentrations of interest new_x = [1, 2, 5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000] new_x = np.log10(new_x)
def main(): input_gps_csv_file = "data/gps.csv" # define the file name input_acc_csv_file = "data/data.csv" input_dist_csv_file = "data/distance.csv" # read the file and extract the required data lat, lon, time_gps = CSVReadGPS(input_gps_csv_file) accX, accY, time_acc = CSVReadAcc(input_acc_csv_file) dist, time_ultra = CSVReadUltra(input_dist_csv_file) # loop in the lists to estimate the velocities num_gps_val = len(lat) # Number of GPS data points captured num_acc_val = len(accX) # Number of Accelerometer data points captured num_dist_val = len(dist) # number of ultrasound readings collected #### Calculate the covariances of data # GPS based velocity estimate velNorth = [] velWest = [] velNorth.append(0) # intial Velocity velWest.append(0) # intial Velocity for i in range(1, num_gps_val): y_t = GPSVel(time_gps[i], lat[i], lon[i], time_gps[i - 1], lat[i - 1], lon[i - 1]) velNorth.append(y_t[0]) velWest.append(y_t[1]) # Accelerometer based Velocity Estimate velX = [] velY = [] velX.append(0) # intial Velocity velY.append(0) # intial Velocity for i in range(1, num_acc_val): acc_time_prev = time_acc[i - 1] acc_time_cur = time_acc[i] acc_time_cur_temp = acc_time_cur.split(":") acc_time_cur_temp = [float(item) for item in acc_time_cur_temp] acc_time_prev_temp = acc_time_prev.split(":") acc_time_prev_temp = [float(item) for item in acc_time_prev_temp] del_acc_t = (acc_time_cur_temp[0] - acc_time_prev_temp[0]) * 3600 + ( acc_time_cur_temp[1] - acc_time_prev_temp[1]) * 60 + ( acc_time_cur_temp[2] - acc_time_prev_temp[2]) # Measurements from the accelerometer a_t = np.array([2, 1], dtype=np.float32) a_t[0] = accX[i] # X-direction a_t[1] = accY[i] # Y-direction velX.append(a_t[0] * del_acc_t + velX[i - 1]) # V = U + a*t velY.append(a_t[1] * del_acc_t + velY[i - 1]) cov_gps_velN = np.cov(velNorth) cov_gps_velW = np.cov(velWest) cov_acc_velX = np.cov(velX) cov_acc_velY = np.cov(velY) cov_dist = np.cov(dist) print("Covariances Calculated") # Accelerometer Data acquisition frequency is higher than GPS. GPS: update, Accelerometer: Prediction # Intialize the filter parameters: # Velocity Estimate x_init = np.array([velX[0], velY[0]]) p_init = np.array([1, 0, 1, 0]) # initialize with high covariance value q_matrix = np.array([cov_acc_velX, 0, 0, cov_acc_velY], dtype=np.float32) r_matrix = np.array([cov_gps_velN, 0, 0, cov_gps_velW], dtype=np.float32) acc_data_count = 1 # counter for data index in accelermeter ultra_data_count = 1 # counter for data index in ultrasound prediction step # intialize temporary variables acc_data_count_temp = 0 ultra_data_count_temp = 0 xd_init = dist[0] pd_init = 1e10 r_ultra = cov_dist x_prior = np.empty([2, 1]) p_prior = np.empty([2, 2]) xd_prior = 0 pd_prior = 0 x_post = np.empty([2, 1]) p_post = np.empty([2, 2]) xd_post = 0 pd_post = 0 u_t = np.empty([2, 1]) y_t = np.empty([2, 1]) # store the state vector and covariance matrix #x_post_vec = np.empty([num_gps_val,2], dtype = np.float32) x1_post_vec = [] p1_post_vec = [] x2_post_vec = [] p2_post_vec = [] x_post_arr = np.zeros((24, 4)) p_post_arr = np.zeros((24, 4)) time_arr = np.zeros((24, 1)) xd_post_arr = np.zeros((3, 1)) pd_post_arr = np.zeros((3, 1)) d_time_arr = np.zeros((3, 1)) print("Q_matrix: ", q_matrix.reshape(2, 2)) print("R_matrix: ", r_matrix.reshape(2, 2)) # Filter Implementation for i in range(1, num_gps_val): y_t = GPSVel(time_gps[i], lat[i], lon[i], time_gps[i - 1], lat[i - 1], lon[i - 1]) gps_time_cur_temp = time_gps[i].split(":") gps_time_cur_temp = [float(item) for item in gps_time_cur_temp] gps_time_cur_temp = GMTCDTconv(gps_time_cur_temp) gps_time_prev_temp = time_gps[i - 1].split(":") gps_time_prev_temp = [float(item) for item in gps_time_prev_temp] gps_time_prev_temp = GMTCDTconv(gps_time_prev_temp) gps_del_t = (gps_time_cur_temp[0] - gps_time_prev_temp[0]) * 3600 + ( gps_time_cur_temp[1] - gps_time_prev_temp[1]) * 60 + ( gps_time_cur_temp[2] - gps_time_prev_temp[2]) # accumulating previous estimates if i == 1: x_prev_est = x_init p_prev = p_init else: x_prev_est = x_post p_prev = p_post acc_data_count = acc_data_count_temp + acc_data_count acc_data_count_temp = 0 # Prediction Steps for j in range(acc_data_count, num_acc_val): if acc_data_count_temp > 0: # Account for multiple prediction steps x_prev_est = x_prior p_prev = p_prior acc_data_count_temp = acc_data_count_temp + 1 # update the counter acc_time_cur = time_acc[j] acc_time_cur_temp = acc_time_cur.split(":") acc_time_cur_temp = [float(item) for item in acc_time_cur_temp] time_diff = ( acc_time_cur_temp[0] - gps_time_cur_temp[0]) * 3600 + ( acc_time_cur_temp[1] - gps_time_cur_temp[1]) * 60 + ( acc_time_cur_temp[2] - gps_time_cur_temp[2]) if time_diff < 0: # proceed to the prediction step for all the prediction steps before the GPS readings acc_time_prev = time_acc[j - 1] #time difference acc_time_prev_temp = acc_time_prev.split(":") acc_time_prev_temp = [ float(item) for item in acc_time_prev_temp ] del_acc_t = ( acc_time_cur_temp[0] - acc_time_prev_temp[0]) * 3600 + ( acc_time_cur_temp[1] - acc_time_prev_temp[1]) * 60 + ( acc_time_cur_temp[2] - acc_time_prev_temp[2]) # Measurements from the accelerometer u_t = np.array([2, 1], dtype=np.float32) u_t[0] = accX[j] # X-direction u_t[1] = accY[j] # Y-direction # Kalman Filter Prediction x_prior, p_prior = KF(x_prev_est, u_t, del_acc_t, 0, q_matrix, 0, p_prev, True, False) acc_data_count = acc_data_count + 1 else: break # end of prediction loop # run the update only if there is a prediction steps if acc_data_count_temp > 0: # call the KF update x_post, p_post = KF(x_prior, u_t, del_acc_t, y_t, 0, r_matrix, p_prior, False, True) print("P: ", x_post) ######################################################################################################################################################################pyt #print(p_post) x1_post_vec.append(x_post[0][0]) p1_post_vec.append(p_post[0]) x2_post_vec.append(x_post[0][1]) p2_post_vec.append(p_post[3]) if i < 24: cur_time = (gps_time_cur_temp[0]) * 3600 + ( gps_time_cur_temp[1]) * 60 + (gps_time_cur_temp[2]) time_arr[i] = 84600 - cur_time x_post_new = x_post.flatten() if x_post_new.shape[0] == 2: x_post_arr[i, :2] = x_post_new p_post_arr[i, :] = p_post.flatten() else: x_post_arr[i, :] = x_post.flatten() p_post_arr[i, :] = p_post.flatten() #### # Distance Estimation if i == 1: xd_prev_est = xd_init pd_prev = pd_init kfd = kalman(pd_init, r_ultra, xd_prev_est, pd_prev) # filter initialization else: xd_prev_est = xd_post pd_prev = pd_post kfd = kalman(p_post[3], r_ultra, xd_prev_est, pd_prev) # filter initialization # prediction using calculated velocity xd_prior, pd_prior = kfd.update_meas(0, gps_del_t, x_post[0][1], False) # Update step: iterate over the different measurement data and use the value which is recorded closest to the current GPS time step ultra_data_count_temp = 0 gps_ultra_diff = 0 for j in range(ultra_data_count + 1, num_dist_val): # Time of the measurement dis_time_cur = time_ultra[j] dis_time_cur_temp = dis_time_cur.split(":") dis_time_cur_temp = [float(item) for item in dis_time_cur_temp] #print(dis_time_cur_temp) #print(gps_time_cur_temp) # keep iterating until the difference is positive gps_ultra_diff = ( dis_time_cur_temp[0] - gps_time_cur_temp[0]) * 3600 + ( dis_time_cur_temp[1] - gps_time_cur_temp[1]) * 60 + ( dis_time_cur_temp[2] - gps_time_cur_temp[2]) if gps_ultra_diff < 0: ultra_data_count_temp = ultra_data_count_temp + 1 else: break # break the for loop if measurement when ahead of prediction # end of for loop ultra_data_count_tem = ultra_data_count ultra_data_count = ultra_data_count + ultra_data_count_temp - 1 if ultra_data_count_tem == ultra_data_count: continue if ultra_data_count_temp > 0: # only perform update if a measurement is detected in the given range #print(ultra_data_count) xd_post, pd_post = kfd.update_meas(dist[ultra_data_count], gps_del_t, x_post[0][1], True) if i < 3: time_curr = (gps_time_cur_temp[0]) * 3600 + ( gps_time_cur_temp[1]) * 60 + (gps_time_cur_temp[2]) d_time_arr[i] = 84600 - time_curr xd_post_arr[i] = xd_post pd_post_arr[i] = pd_post mass = 1.81 # kg (~ 4 pounds) radius = 0.062 / 2 # meters height = 0.033 # meters (bump height) wheelbumpdynamics(mass, radius, x_post_arr[0, 0], height) print('================================================') # End of Distance Estimation else: continue # end of for loop #print(p1_post_vec) #print("R_ultra: ", r_ultra) #print(pd_post) #print(p2_post_vec) plot_results(x_post_arr, p_post_arr, time_arr, xd_post_arr, pd_post_arr, d_time_arr)
def plot_missing_pattern(self, ax=None, row_order="pattern", column_order="pattern", hide_complete_rows=False, hide_complete_columns=False, color_row_patterns=True): """ Generate an image showing the missing data pattern. Parameters ---------- ax : matplotlib axes Axes on which to draw the plot. row_order : string The method for ordering the rows. Must be one of 'pattern', 'proportion', or 'raw'. column_order : string The method for ordering the columns. Must be one of 'pattern', 'proportion', or 'raw'. hide_complete_rows : boolean If True, rows with no missing values are not drawn. hide_complete_columns : boolean If True, columns with no missing values are not drawn. color_row_patterns : boolean If True, color the unique row patterns, otherwise use grey and white as colors. Returns ------- A figure containing a plot of the missing data pattern. """ # Create an indicator matrix for missing values. miss = np.zeros(self.data.shape) cols = self.data.columns for j, col in enumerate(cols): ix = self.ix_miss[col] miss[ix, j] = 1 # Order the columns as requested if column_order == "proportion": ix = np.argsort(miss.mean(0)) elif column_order == "pattern": cv = np.cov(miss.T) u, s, vt = np.linalg.svd(cv, 0) ix = np.argsort(cv[:, 0]) elif column_order == "raw": ix = np.arange(len(cols)) else: raise ValueError(column_order + " is not an allowed value for `column_order`.") miss = miss[:, ix] cols = [cols[i] for i in ix] # Order the rows as requested if row_order == "proportion": ix = np.argsort(miss.mean(1)) elif row_order == "pattern": x = 2**np.arange(miss.shape[1]) rky = np.dot(miss, x) ix = np.argsort(rky) elif row_order == "raw": ix = np.arange(miss.shape[0]) else: raise ValueError(row_order + " is not an allowed value for `row_order`.") miss = miss[ix, :] if hide_complete_rows: ix = np.flatnonzero((miss == 1).any(1)) miss = miss[ix, :] if hide_complete_columns: ix = np.flatnonzero((miss == 1).any(0)) miss = miss[:, ix] cols = [cols[i] for i in ix] from statsmodels.graphics import utils as gutils from matplotlib.colors import LinearSegmentedColormap if ax is None: fig, ax = gutils.create_mpl_ax(ax) else: fig = ax.get_figure() if color_row_patterns: x = 2**np.arange(miss.shape[1]) rky = np.dot(miss, x) _, rcol = np.unique(rky, return_inverse=True) miss *= 1 + rcol[:, None] ax.imshow(miss, aspect="auto", interpolation="nearest", cmap='gist_ncar_r') else: cmap = LinearSegmentedColormap.from_list("_", ["white", "darkgrey"]) ax.imshow(miss, aspect="auto", interpolation="nearest", cmap=cmap) ax.set_ylabel("Cases") ax.set_xticks(range(len(cols))) ax.set_xticklabels(cols, rotation=90) return fig
def get_events_derivative(dff_trace, k_min=0, k_max=10, delta=3, smooth_window=5, smooth_weight=0.3, plot=False): ''' this seems to work ok :param dff_trace: :param k_min: :param k_max: :param delta: :param first_only: :param smooth_window: :param smooth_weight: :param plot: :return: ''' dff_trace = smooth(dff_trace, smooth_window) # if smooth_weight > 0: # dff_trace = denoise_tv_chambolle(dff_trace, weight=smooth_weight) var_dict = {} for ii in range(len(dff_trace)): if ii + k_min >= 0 and ii + k_max <= len(dff_trace): trace = dff_trace[ii + k_min:ii + k_max] xx = (trace - trace[0])[delta] - (trace - trace[0])[0] # yy = (trace - trace[0])[delta + 2] - (trace - trace[0])[0 + 2] yy = max((trace - trace[0])[delta + 2] - (trace - trace[0])[0 + 2], (trace - trace[0])[delta + 3] - (trace - trace[0])[0 + 3], (trace - trace[0])[delta + 4] - (trace - trace[0])[0 + 4]) var_dict[ii] = (trace[0], trace[-1], xx, yy) xx_list, yy_list = [], [] for _, _, xx, yy in var_dict.itervalues(): xx_list.append(xx) yy_list.append(yy) mu_x = np.median(xx_list) mu_y = np.median(yy_list) xx_centered = np.array(xx_list) - mu_x yy_centered = np.array(yy_list) - mu_y std_factor = 1 std_x = 1. / std_factor * np.percentile(np.abs(xx_centered), [100 * (1 - 2 * (1 - sps.norm.cdf(std_factor)))]) std_y = 1. / std_factor * np.percentile(np.abs(yy_centered), [100 * (1 - 2 * (1 - sps.norm.cdf(std_factor)))]) curr_inds = [] allowed_sigma = 4 for ii, (xi, yi) in enumerate(zip(xx_centered, yy_centered)): if np.sqrt(((xi) / std_x) ** 2 + ((yi) / std_y) ** 2) < allowed_sigma: curr_inds.append(True) else: curr_inds.append(False) curr_inds = np.array(curr_inds) data_x = xx_centered[curr_inds] data_y = yy_centered[curr_inds] Cov = np.cov(data_x, data_y) Cov_Factor = np.linalg.cholesky(Cov) Cov_Factor_Inv = np.linalg.inv(Cov_Factor) # =================================================================================================================== # fig_dff, ax_dff = plt.subplots() # ax_dff.plot(dff_trace, 'k') # fig, ax = plt.subplots() noise_threshold = max(allowed_sigma * std_x + mu_x, allowed_sigma * std_y + mu_y) mu_array = np.array([mu_x, mu_y]) yes_list, no_list, size_list = [], [], [] for ii, (t0, tf, xx, yy) in var_dict.iteritems(): xi_z, yi_z = Cov_Factor_Inv.dot((np.array([xx, yy]) - mu_array)) # # Conditions in order: # # 1) Outside noise blob # # 2) Minimum change in df/f # # 3) Change evoked by this trial, not previous # # 4) At end of trace, ended up outside of noise floor # # if np.sqrt(xi_z ** 2 + yi_z ** 2) > 4 and yy > .05 and xx < yy and tf > noise_threshold / 2: # Conditions in order: # 1) outside noise blob # 2) positive transient # 3) change evoked by this trial, not next if np.sqrt(xi_z**2 + yi_z**2) > 4 and xx > 0: # if np.sqrt(xi_z ** 2 + yi_z ** 2) > 4 and yy > .05 and xx < yy and tf > noise_threshold / 2: yes_list.append(ii) size_list.append(xx) # ax.plot([xx], [yy], 'b.') # ax_dff.plot(ii, 2., 'b') else: no_list.append(ii) # ax.plot([xx], [yy], 'r.') # events_temp[yes_list] = 1 if plot: plt.figure() plt.plot(xx_list[yes_list], yy[yes_list], 'b.') plt.plot(xx_list[no_list], yy[no_list], 'r.') yes_array = np.array(yes_list) size_array = np.array(size_list) return yes_array, size_array
print(np.quantile(rest_med, [0.025, 0.5, 0.975])) print(np.mean([1 if i > 0 else 0 for i in rest_med])) #----------------------------------------------------------------------------# # Analisis graficos - Primer grafico y1_1_grid = np.linspace(38, 62, num=1000) y2_1_grid = np.linspace(38, 68, num=1000) X, Y = np.meshgrid(y1_1_grid, y2_1_grid) pos = np.empty(X.shape + (2, )) pos[:, :, 0] = X pos[:, :, 1] = Y thetas1 = [i[0][0] for i in param] thetas2 = [i[0][1] for i in param] cov1 = np.cov(thetas1, thetas2) rv = multivariate_normal([np.mean(thetas1), np.mean(thetas2)], cov1).pdf(pos) plt.contour(X, Y, rv) plt.plot(y1_1_grid, y2_1_grid) # Segundo grafico y1_2_grid = np.linspace(0, 100, num=1000) y2_2_grid = np.linspace(0, 100, num=1000) X2, Y2 = np.meshgrid(y1_2_grid, y2_2_grid) pos2 = np.empty(X2.shape + (2, )) pos2[:, :, 0] = X2 pos2[:, :, 1] = Y2 ys1 = [i[0] for i in data_pred]
def test_sample_paths_wiener(self, watch_params, use_time_step, use_time_grid, supply_normal_draws): """Tests paths properties for Wiener process (dX = dW).""" dtype = tf.float64 def drift_fn(_, x): return tf.zeros_like(x) def vol_fn(_, x): return tf.expand_dims(tf.ones_like(x), -1) times = np.array([0.1, 0.2, 0.3]) num_samples = 10000 if watch_params: watch_params = [] else: watch_params = None if use_time_step: time_step = 0.01 num_time_steps = None else: time_step = None num_time_steps = 30 if use_time_grid: time_step = None times_grid = tf.linspace(tf.constant(0.0, dtype=dtype), 0.3, 31) else: times_grid = None if supply_normal_draws: num_samples = 1 # Use antithetic sampling normal_draws = tf.random.stateless_normal(shape=[5000, 30, 1], seed=[1, 42], dtype=dtype) normal_draws = tf.concat([normal_draws, -normal_draws], axis=0) else: normal_draws = None paths = euler_sampling.sample( dim=1, drift_fn=drift_fn, volatility_fn=vol_fn, times=times, num_samples=num_samples, time_step=time_step, num_time_steps=num_time_steps, watch_params=watch_params, normal_draws=normal_draws, times_grid=times_grid, random_type=random.RandomType.STATELESS_ANTITHETIC, seed=[1, 42]) # The correct number of samples num_samples = 10000 with self.subTest('Shape'): self.assertAllEqual(paths.shape.as_list(), [num_samples, 3, 1]) paths = self.evaluate(paths) means = np.mean(paths, axis=0).reshape([-1]) covars = np.cov(paths.reshape([num_samples, -1]), rowvar=False) expected_means = np.zeros((3, )) expected_covars = np.minimum(times.reshape([-1, 1]), times.reshape([1, -1])) with self.subTest('Means'): self.assertAllClose(means, expected_means, rtol=1e-2, atol=1e-2) with self.subTest('Covariance'): self.assertAllClose(covars, expected_covars, rtol=1e-2, atol=1e-2)
def get_cca_similarity(acts1, acts2, threshold=0.98, compute_dirns=True, verbose=True): """The main function for computing cca similarities. This function computes the cca similarity between two sets of activations, returning a dict with the cca coefficients, a few statistics of the cca coefficients, and (optionally) the actual directions. Args: acts1: (num_neurons1, data_points) a 2d numpy array of neurons by datapoints where entry (i,j) is the output of neuron i on datapoint j. acts2: (num_neurons2, data_points) same as above, but (potentially) for a different set of neurons. Note that acts1 and acts2 can have different numbers of neurons, but must agree on the number of datapoints threshold: float between 0, 1 used to get rid of trailing zeros in the cca correlation coefficients to output more accurate summary statistics of correlations. compute_dirns: boolean value determining whether actual cca directions are computed. (For very large neurons and datasets, may be better to compute these on the fly instead of store in memory.) verbose: Boolean, whether info about intermediate outputs printed Returns: return_dict: A dictionary with outputs from the cca computations. Contains neuron coefficients (combinations of neurons that correspond to cca directions), the cca correlation coefficients (how well aligned directions correlate), x and y idxs (for computing cca directions on the fly if compute_dirns=False), and summary statistics. If compute_dirns=True, the cca directions are also computed. """ # assert dimensionality equal assert acts1.shape[1] == acts2.shape[1], "dimensions don't match" # check that acts1, acts2 are transposition assert acts1.shape[0] < acts1.shape[1], ("input must be number of neurons" "by datapoints") return_dict = {} # compute covariance with numpy function for extra stability numx = acts1.shape[0] covariance = np.cov(acts1, acts2) sigmaxx = covariance[:numx, :numx] sigmaxy = covariance[:numx, numx:] sigmayx = covariance[numx:, :numx] sigmayy = covariance[numx:, numx:] # rescale covariance to make cca computation more stable xmax = np.max(np.abs(sigmaxx)) ymax = np.max(np.abs(sigmayy)) sigmaxx /= xmax sigmayy /= ymax sigmaxy /= np.sqrt(xmax * ymax) sigmayx /= np.sqrt(xmax * ymax) ([_, sx, vx], [_, sy, vy], invsqrt_xx, invsqrt_yy, x_idxs, y_idxs) = compute_ccas(sigmaxx, sigmaxy, sigmayx, sigmayy, verbose) # if x_idxs or y_idxs is all false, return_dict has zero entries if (not np.any(x_idxs)) or (not np.any(y_idxs)): return create_zero_dict(compute_dirns, acts1.shape[1]) if compute_dirns: # orthonormal directions that are CCA directions cca_dirns1 = np.dot(vx, np.dot(invsqrt_xx, acts1[x_idxs])) cca_dirns2 = np.dot(vy, np.dot(invsqrt_yy, acts2[y_idxs])) # get rid of trailing zeros in the cca coefficients idx1 = sum_threshold(sx, threshold) idx2 = sum_threshold(sy, threshold) return_dict["neuron_coeffs1"] = np.dot(vx, invsqrt_xx) return_dict["neuron_coeffs2"] = np.dot(vy, invsqrt_yy) return_dict["cca_coef1"] = sx return_dict["cca_coef2"] = sy return_dict["x_idxs"] = x_idxs return_dict["y_idxs"] = y_idxs # summary statistics return_dict["mean"] = (np.mean(sx[:idx1]), np.mean(sy[:idx2])) return_dict["sum"] = (np.sum(sx), np.sum(sy)) if compute_dirns: return_dict["cca_dirns1"] = cca_dirns1 return_dict["cca_dirns2"] = cca_dirns2 return return_dict
data = pd.read_csv('USArrests.csv') Murder = np.array(data['Murder']) Murder = (Murder - Murder.mean()) / Murder.std() Assault = np.array(data['Assault']) Assault = (Assault - Assault.mean()) / Assault.std() UrbanPop = np.array(data['UrbanPop']) UrbanPop = (UrbanPop - UrbanPop.mean()) / UrbanPop.std() Rape = np.array(data['Rape']) Rape = (Rape - Rape.mean()) / Rape.std() states = np.array(data['Unnamed: 0']) matriz = np.array([Murder, Assault, UrbanPop, Rape]) # In[116]: mcov = np.cov(matriz) val, vect = np.linalg.eig(mcov) vec1 = vect[:, 0] vec2 = vect[:, 1] # In[117]: variables = ['Murder', 'Assault', 'UrbanPop', 'Rape'] plt.figure(figsize=(12, 11)) for i in range(len(states)): statesX = np.dot(vec1, matriz[:, i]) statesY = np.dot(vec2, matriz[:, i]) plt.annotate(states[i], (statesX, statesY), fontsize=9, c='green') plt.scatter(statesX, statesY, c='white')
new_complex = complex(round(complex_number.real, 2), round(complex_number.imag, 2)) rounded_state_vector.append(new_complex) plot_bloch_multivector(statevector, title=str(rounded_state_vector)) plt.savefig(filename) url = "https://raw.githubusercontent.com/ibonreinoso/qiskit-hackathon-bilbao-19/master/DAX_PERFORMANCE_INDEX.csv" data = pd.read_csv(url, sep=';') data = data.drop(['wkn_500340'], axis=1) data = data.loc[:, ['wkn_515100', 'wkn_575200']] print(data) sigma2 = np.cov(data.values.T) rho2 = sigma2 / np.matrix.trace(sigma2) print(rho2) eigenvalues, (eigenvector1, eigenvector2) = np.linalg.eigh(rho2) print(eigenvalues, eigenvector1, eigenvector2) eigenvector1.dot(rho2) eigenvector1 * eigenvalues[0] NUM_QUBITS = 3 NUM_ITERATION = 50 SHOTS_PER_ITERATION = 8192 backend = BasicAer.get_backend('qasm_simulator') state_vector = [1, 0]
def linregress(x, y=None): """ Calculate a regression line This computes a least-squares regression for two sets of measurements. Parameters ---------- x, y : array_like two sets of measurements. Both arrays should have the same length. If only x is given (and y=None), then it must be a two-dimensional array where one dimension has length 2. The two sets of measurements are then found by splitting the array along the length-2 dimension. Returns ------- slope : float slope of the regression line intercept : float intercept of the regression line r-value : float correlation coefficient p-value : float two-sided p-value for a hypothesis test whose null hypothesis is that the slope is zero. stderr : float Standard error of the estimate Examples -------- >>> from scipy import stats >>> import numpy as np >>> x = np.random.random(10) >>> y = np.random.random(10) >>> slope, intercept, r_value, p_value, std_err = stats.linregress(x,y) # To get coefficient of determination (r_squared) >>> print "r-squared:", r_value**2 r-squared: 0.15286643777 """ TINY = 1.0e-20 if y is None: # x is a (2, N) or (N, 2) shaped array_like x = numpy.asarray(x) if x.shape[0] == 2: x, y = x elif x.shape[1] == 2: x, y = x.T else: msg = "If only `x` is given as input, it has to be of shape (2, N) \ or (N, 2), provided shape was %s" % str(x.shape) raise ValueError(msg) else: x = numpy.asarray(x) y = numpy.asarray(y) n = len(x) xmean = numpy.mean(x, None) ymean = numpy.mean(y, None) # average sum of squares: ssxm, ssxym, ssyxm, ssym = numpy.cov(x, y, bias=1).flat r_num = ssxym r_den = numpy.sqrt(ssxm * ssym) if r_den == 0.0: r = 0.0 else: r = r_num / r_den # test for numerical error propagation if (r > 1.0): r = 1.0 elif (r < -1.0): r = -1.0 df = n - 2 t = r * numpy.sqrt(df / ((1.0 - r + TINY) * (1.0 + r + TINY))) # prob = distributions.t.sf(numpy.abs(t),df)*2 slope = r_num / ssxm intercept = ymean - slope * xmean sterrest = numpy.sqrt((1 - r * r) * ssym / ssxm / df) pred = intercept + slope * x sigma = numpy.sqrt(1. / (len(x) - 1) * numpy.sum((y - pred)**2)) return slope, intercept, sigma
# -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from sklearn import datasets iris = datasets.load_iris() data = iris.data K = 3 meu_0 = data[np.random.randint(data.shape[0], size=K)] sigma_0 = np.array([np.cov(data.T)] * K) alpha_0 = np.ones(K) * (1 / K) def gaussian(x, meu_k, sigma_k): m = x - meu_k f = np.linalg.inv(sigma_k) norm = 1 / (np.sqrt(((2 * np.pi)) * (np.linalg.det(sigma_k)))) exp = np.exp(-0.5 * (m.T.dot(f.dot(m)))) return norm * exp def get_new_sigma(data, new_meu, w, N_k): sigma_tot = np.array([np.zeros((data.shape[1], data.shape[1]))] * K) for k in range(K): sum_sigma_i = 0 for i in range(len(data)): x = data[i] - new_meu[k] sigma = w[i, k] * np.outer(x, x.T) sum_sigma_i += sigma sigma_tot[k] = sum_sigma_i / N_k[k]
def calculate_songs_characteristics(self, mfcc_amount, cache_dir): """Calculate the songs characteristics. :param int mfcc_amount: The amount of mfccs to calculate. :param cache_dir: The directory to find and store the cache. The bpm and mfcc is cached. If it is False caching is disabled. :type cache_dir: str or ``False`` :returns: A tuple of respectively their PCA components, a dictionary for in which each song has a tuple of respectively their cholesky decomposition, the mean of their mfcc and their average BPM. Finally the return tuple contains the current weights for calculating the covariance matrix. :rtype: tuple(numpy.array, dict[string, tuple(numpy.array, int, int)], numpy.array) """ mfccs = dict() tempos = dict() average = numpy.zeros(mfcc_amount) song_properties = dict() # Calculate the average 20D feature vector for the mfccs for song_file in self.song_files: filename, _ = os.path.splitext(os.path.basename(song_file)) l.debug("Currently loading %s.", filename) if cache_dir and os.path.isfile( os.path.join(cache_dir, filename + "_done")): l.debug("Loading our song from cache.") mfcc = numpy.load( os.path.join(cache_dir, filename + "_mfcc") + os.extsep + 'npy') tempo = numpy.load( os.path.join(cache_dir, filename + "_tempo") + os.extsep + 'npy') else: l.debug("Song not found in cache, processing it.") if cache_dir: mfcc, tempo = self.process_song_file(mfcc_amount, cache_dir, song_file) else: mfcc, tempo = self.get_mfcc_and_tempo(song_file, mfcc_amount) mfccs[song_file] = mfcc tempos[song_file] = tempo average += mfcc.mean(1) # NOTE: We don't use the length of the songs as weights. Because we # prefer to weigh each song equally. This is also influenced by the # fact that we don't know how long each song will be played so using # the entire length doesn't really make any sense. average = average / len(self.song_files) average_covariance = numpy.array( [numpy.zeros(mfcc_amount) for _ in range(mfcc_amount)]) # Now calculate the centered mfcc and covariance matrix for each song # and keep a running average of the average covariance matrix. for song_file, mfcc in mfccs.items(): mfcc = (mfcc.T - average).T covariance = numpy.cov(mfcc) average_covariance += covariance props = (numpy.linalg.cholesky(covariance), numpy.mean(mfcc, 1), tempos[song_file]) song_properties[song_file] = props # Do PCA on the average covariance matrix average_covariance = average_covariance / len(self.song_files) pca = PCA(self.weight_amount) pca.fit(average_covariance.T) # Initialize the weights to the explained variance ratio if the weights # are not yet set. if self.weights is None: weights = pca.explained_variance_ratio_ else: weights = self.weights return pca.components_.T, song_properties, weights
sharpe_ratios = np.zeros(len(good_pairs)) wealth_mat = np.zeros((len(good_pairs),len(wealth)-2)) for i in range(len(good_pairs)): if not i%10: print(i) wealth, q_stock_1, train_res=trading(good_pairs[i][0],good_pairs[i][1]) sharpe_ratios[i] = sharpe_ratio(wealth[:-2]) wealth_mat[i,:] = wealth[:-2] print(np.mean(sharpe_ratios)) print(np.var(sharpe_ratios)) ''' COMPUTE BETA ''' market = pd.read_csv(market_2010) market['Date'] = pd.to_datetime(market['Date'],format = '%Y-%m-%d') market.set_index('Date') training_period_data = market[-len(wealth)+2:] market_price = np.array(training_period_data['Open']) tomorrow_price_market = market_price[1:] today_price_market = market_price[:(len(market_price)-1)] market_return = (tomorrow_price_market-today_price_market)/today_price_market port = np.mean(wealth_mat, axis = 0) port_return = (port[1:]- port[:-1])/port[:-1] beta_mat = np.vstack((port_return,market_return)) print(np.cov(beta_mat))
def find_ellipse(prob, cl=90, projection='ARC', nest=False): """For a HEALPix map, find an ellipse that contains a given probability. The orientation is defined as the angle of the semimajor axis counterclockwise from west on the plane of the sky. If you think of the semimajor distance as the width of the ellipse, then the orientation is the clockwise rotation relative to the image x-axis. Equivalently, the orientation is the position angle of the semi-minor axis. These conventions match the definitions used in DS9 region files [1]_ and Aladin drawing commands [2]_. Parameters ---------- prob : np.ndarray, astropy.table.Table The HEALPix probability map, either as a full rank explicit array or as a multi-order map. cl : float The desired credible level (default: 90). projection : str, optional The WCS projection (default: 'ARC', or zenithal equidistant). For a list of possible values, see the Astropy documentation [3]_. nest : bool HEALPix pixel ordering (default: False, or ring ordering). Returns ------- ra : float The ellipse center right ascension in degrees. dec : float The ellipse center right ascension in degrees. a : float The lenth of the semimajor axis in degrees. b : float The length of the semiminor axis in degrees. pa : float The orientation of the ellipse axis on the plane of the sky in degrees. area : float The area of the ellipse in square degrees. Notes ----- The center of the ellipse is the median a posteriori sky position. The length and orientation of the semi-major and semi-minor axes are measured as follows: 1. The sky map is transformed to a WCS projection that may be specified by the caller. The default projection is ``ARC`` (zenithal equidistant), in which radial distances are proportional to the physical angular separation from the center point. 2. A 1-sigma ellipse is estimated by calculating the covariance matrix in the projected image plane using three rounds of sigma clipping to reject distant outlier points. 3. The 1-sigma ellipse is inflated until it encloses an integrated probability of ``cl`` (default: 90%). The function returns a tuple of the right ascension, declination, semi-major distance, semi-minor distance, and orientation angle, all in degrees. References ---------- .. [1] http://ds9.si.edu/doc/ref/region.html .. [2] http://aladin.u-strasbg.fr/java/AladinScriptManual.gml#draw .. [3] http://docs.astropy.org/en/stable/wcs/index.html#supported-projections Examples -------- **Example 1** First, we need some imports. >>> from astropy.io import fits >>> from astropy.utils.data import download_file >>> from astropy.wcs import WCS >>> import healpy as hp >>> from reproject import reproject_from_healpix >>> import subprocess Next, we download the BAYESTAR sky map for GW170817 from the LIGO Document Control Center. >>> url = 'https://dcc.ligo.org/public/0146/G1701985/001/bayestar.fits.gz' # doctest: +SKIP >>> filename = download_file(url, cache=True, show_progress=False) # doctest: +SKIP >>> _, healpix_hdu = fits.open(filename) # doctest: +SKIP >>> prob = hp.read_map(healpix_hdu, verbose=False) # doctest: +SKIP Then, we calculate ellipse and write it to a DS9 region file. >>> ra, dec, a, b, pa, area = find_ellipse(prob) # doctest: +SKIP >>> print(*np.around([ra, dec, a, b, pa, area], 5)) # doctest: +SKIP 195.03732 -19.29358 8.66545 1.1793 63.61698 32.07665 >>> s = 'fk5;ellipse({},{},{},{},{})'.format(ra, dec, a, b, pa) # doctest: +SKIP >>> open('ds9.reg', 'w').write(s) # doctest: +SKIP Then, we reproject a small patch of the HEALPix map, and save it to a file. >>> wcs = WCS() # doctest: +SKIP >>> wcs.wcs.ctype = ['RA---ARC', 'DEC--ARC'] # doctest: +SKIP >>> wcs.wcs.crval = [ra, dec] # doctest: +SKIP >>> wcs.wcs.crpix = [128, 128] # doctest: +SKIP >>> wcs.wcs.cdelt = [-0.1, 0.1] # doctest: +SKIP >>> img, _ = reproject_from_healpix(healpix_hdu, wcs, [256, 256]) # doctest: +SKIP >>> img_hdu = fits.ImageHDU(img, wcs.to_header()) # doctest: +SKIP >>> img_hdu.writeto('skymap.fits') # doctest: +SKIP Now open the image and region file in DS9. You should find that the ellipse encloses the probability hot spot. You can load the sky map and region file from the command line: .. code-block:: sh $ ds9 skymap.fits -region ds9.reg Or you can do this manually: 1. Open DS9. 2. Open the sky map: select "File->Open..." and choose ``skymap.fits`` from the dialog box. 3. Open the region file: select "Regions->Load Regions..." and choose ``ds9.reg`` from the dialog box. Now open the image and region file in Aladin. 1. Open Aladin. 2. Open the sky map: select "File->Load Local File..." and choose ``skymap.fits`` from the dialog box. 3. Open the sky map: select "File->Load Local File..." and choose ``ds9.reg`` from the dialog box. You can also compare the original HEALPix file with the ellipse in Aladin: 1. Open Aladin. 2. Open the HEALPix file by pasting the URL from the top of this example in the Command field at the top of the window and hitting return, or by selecting "File->Load Direct URL...", pasting the URL, and clicking "Submit." 3. Open the sky map: select "File->Load Local File..." and choose ``ds9.reg`` from the dialog box. **Example 2** This example shows that we get approximately the same answer for GW171087 if we read it in as a multi-order map. >>> from ..io import read_sky_map # doctest: +SKIP >>> skymap_moc = read_sky_map(healpix_hdu, moc=True) # doctest: +SKIP >>> ellipse = find_ellipse(skymap_moc) # doctest: +SKIP >>> print(*np.around(ellipse, 5)) # doctest: +SKIP 195.03709 -19.27589 8.67611 1.18167 63.60454 32.08015 **Example 3** I'm not showing the `ra` or `pa` output from the examples below because the right ascension is arbitary when dec=90° and the position angle is arbitrary when a=b; their arbitrary values may vary depending on your math library. Also, I add 0.0 to the outputs because on some platforms you tend to get values of dec or pa that get rounded to -0.0, which is within numerical precision but would break the doctests (see https://stackoverflow.com/questions/11010683). This is an example sky map that is uniform in sin(theta) out to a given radius in degrees. The 90% credible radius should be 0.9 * radius. (There will be deviations for small radius due to finite resolution.) >>> def make_uniform_in_sin_theta(radius, nside=512): ... npix = hp.nside2npix(nside) ... theta, phi = hp.pix2ang(nside, np.arange(npix)) ... theta_max = np.deg2rad(radius) ... prob = np.where(theta <= theta_max, 1 / np.sin(theta), 0) ... return prob / prob.sum() ... >>> prob = make_uniform_in_sin_theta(1) >>> find_ellipse(prob) # doctest: +FLOAT_CMP (225.0, 89.90862520480792, 0.8703361458208101, 0.8703357768874356, 0.0, 2.3788811576269793) >>> prob = make_uniform_in_sin_theta(10) >>> find_ellipse(prob) # doctest: +FLOAT_CMP (225.0, 89.90827657529562, 9.024846562072119, 9.024842703023802, 0.0, 255.11972196535515) >>> prob = make_uniform_in_sin_theta(120) >>> find_ellipse(prob) # doctest: +FLOAT_CMP (179.99995257991023, 90.0, 107.9745037610576, 107.97450376105758, 0.0, 26988.70467497216) **Example 4** These are approximately Gaussian distributions. >>> from scipy import stats >>> def make_gaussian(mean, cov, nside=512): ... npix = hp.nside2npix(nside) ... xyz = np.transpose(hp.pix2vec(nside, np.arange(npix))) ... dist = stats.multivariate_normal(mean, cov) ... prob = dist.pdf(xyz) ... return prob / prob.sum() ... This one is centered at RA=45°, Dec=0° and has a standard deviation of ~1°. >>> prob = make_gaussian( ... [1/np.sqrt(2), 1/np.sqrt(2), 0], ... np.square(np.deg2rad(1))) ... >>> find_ellipse(prob) # doctest: +FLOAT_CMP (45.0, 0.0, 2.1424077148886744, 2.1420790721225518, 90.0, 14.467701995920123) This one is centered at RA=45°, Dec=0°, and is elongated in the north-south direction. >>> prob = make_gaussian( ... [1/np.sqrt(2), 1/np.sqrt(2), 0], ... np.diag(np.square(np.deg2rad([1, 1, 10])))) ... >>> find_ellipse(prob) # doctest: +FLOAT_CMP (44.99999999999999, 0.0, 13.58768882719899, 2.0829846178241853, 90.0, 88.57796576937031) This one is centered at RA=0°, Dec=0°, and is elongated in the east-west direction. >>> prob = make_gaussian( ... [1, 0, 0], ... np.diag(np.square(np.deg2rad([1, 10, 1])))) ... >>> find_ellipse(prob) # doctest: +FLOAT_CMP (0.0, 0.0, 13.583918022027149, 2.0823769912401433, 0.0, 88.54622940628761) This one is centered at RA=0°, Dec=0°, and has its long axis tilted about 10° to the west of north. >>> prob = make_gaussian( ... [1, 0, 0], ... [[0.1, 0, 0], ... [0, 0.1, -0.15], ... [0, -0.15, 1]]) ... >>> find_ellipse(prob) # doctest: +FLOAT_CMP (0.0, 0.0, 64.7713312709293, 33.50754131182681, 80.78231196786838, 6372.344658663038) This one is centered at RA=0°, Dec=0°, and has its long axis tilted about 10° to the east of north. >>> prob = make_gaussian( ... [1, 0, 0], ... [[0.1, 0, 0], ... [0, 0.1, 0.15], ... [0, 0.15, 1]]) ... >>> find_ellipse(prob) # doctest: +FLOAT_CMP (0.0, 0.0, 64.77133127093047, 33.50754131182745, 99.21768803213159, 6372.344658663096) This one is centered at RA=0°, Dec=0°, and has its long axis tilted about 80° to the east of north. >>> prob = make_gaussian( ... [1, 0, 0], ... [[0.1, 0, 0], ... [0, 1, 0.15], ... [0, 0.15, 0.1]]) ... >>> find_ellipse(prob) # doctest: +FLOAT_CMP (0.0, 0.0, 64.7756448603915, 33.509863018519894, 170.78252287327365, 6372.425731592412) This one is centered at RA=0°, Dec=0°, and has its long axis tilted about 80° to the west of north. >>> prob = make_gaussian( ... [1, 0, 0], ... [[0.1, 0, 0], ... [0, 1, -0.15], ... [0, -0.15, 0.1]]) ... >>> find_ellipse(prob) # doctest: +FLOAT_CMP (0.0, 0.0, 64.77564486039148, 33.50986301851987, 9.217477126726322, 6372.42573159241) """ # noqa: E501 try: prob['UNIQ'] except (IndexError, KeyError, ValueError): npix = len(prob) nside = hp.npix2nside(npix) ipix = range(npix) area = hp.nside2pixarea(nside, degrees=True) else: order, ipix = moc.uniq2nest(prob['UNIQ']) nside = 1 << order.astype(int) ipix = ipix.astype(int) area = hp.nside2pixarea(nside) prob = prob['PROBDENSITY'] * area area *= np.square(180 / np.pi) nest = True # Find median a posteriori sky position. xyz0 = [ quantile(x, 0.5, weights=prob) for x in hp.pix2vec(nside, ipix, nest=nest) ] (ra, ), (dec, ) = hp.vec2ang(np.asarray(xyz0), lonlat=True) # Construct WCS with the specified projection # and centered on mean direction. w = WCS() w.wcs.crval = [ra, dec] w.wcs.ctype = ['RA---' + projection, 'DEC--' + projection] # Transform HEALPix to zenithal equidistant coordinates. xy = w.wcs_world2pix( np.transpose(hp.pix2ang(nside, ipix, nest=nest, lonlat=True)), 1) # Keep only values that were inside the projection. keep = np.logical_and.reduce(np.isfinite(xy), axis=1) xy = xy[keep] prob = prob[keep] if not np.isscalar(area): area = area[keep] # Find covariance matrix, performing three rounds of sigma-clipping # to reject outliers. keep = np.ones(len(xy), dtype=bool) for _ in range(3): c = np.cov(xy[keep], aweights=prob[keep], rowvar=False) nsigmas = np.sqrt(np.sum(xy.T * np.linalg.solve(c, xy.T), axis=0)) keep &= (nsigmas < 3) # Find the number of sigma that enclose the cl% credible level. i = np.argsort(nsigmas) nsigmas = nsigmas[i] cls = np.cumsum(prob[i]) if np.isscalar(area): careas = np.arange(1, len(i) + 1) * area else: careas = np.cumsum(area[i]) nsigma = np.interp(1e-2 * cl, cls, nsigmas) area = np.interp(1e-2 * cl, cls, careas) # If the credible level is not within the projection, # then stop here and return all nans. if 1e-2 * cl > cls[-1]: return np.nan, np.nan, np.nan, np.nan, np.nan # Find the eigendecomposition of the covariance matrix. w, v = np.linalg.eigh(c) # Find the semi-minor and semi-major axes. b, a = nsigma * np.sqrt(w) # Find the position angle. pa = np.rad2deg(np.arctan2(*v[0])) # An ellipse is symmetric under rotations of 180°. # Return the smallest possible positive position angle. pa %= 180 # Done! return ra, dec, a, b, pa, area
def MC(x, Ux, b, a, Uab, runs=1000, blow=None, alow=None, return_samples=False, shift=0, verbose=True): r"""Standard Monte Carlo method Monte Carlo based propagation of uncertainties for a digital filter (b,a) with uncertainty matrix :math:`U_{\theta}` for :math:`\theta=(a_1,\ldots,a_{N_a},b_0,\ldots,b_{N_b})^T` Parameters ---------- x: np.ndarray filter input signal Ux: float or np.ndarray standard deviation of signal noise (float), point-wise standard uncertainties or covariance matrix associated with x b: np.ndarray filter numerator coefficients a: np.ndarray filter denominator coefficients Uab: np.ndarray uncertainty matrix :math:`U_\theta` runs: int,optional number of Monte Carlo runs return_samples: bool, optional whether samples or mean and std are returned If ``return_samples`` is ``False``, the method returns: Returns ------- y: np.ndarray filter output signal Uy: np.ndarray uncertainty associated with Otherwise the method returns: Returns ------- Y: np.ndarray array of Monte Carlo results References ---------- * Eichstädt, Link, Harris and Elster [Eichst2012]_ """ Na = len(a) runs = int(runs) Y = np.zeros((runs, len(x))) # set up matrix of MC results theta = np.hstack( (a[1:], b)) # create the parameter vector from the filter coefficients Theta = np.random.multivariate_normal(theta, Uab, runs) # Theta is small and thus we # can draw the full matrix now. if isinstance(Ux, np.ndarray): if len(Ux.shape) == 1: dist = Normal_ZeroCorr(loc=x, scale=Ux) # non-iid noise w/o correlation else: dist = stats.multivariate_normal(x, Ux) # colored noise elif isinstance(Ux, float): dist = Normal_ZeroCorr(loc=x, scale=Ux) # iid noise else: raise NotImplementedError( "The supplied type of uncertainty is not implemented") unst_count = 0 # Count how often in the MC runs the IIR filter is unstable. st_inds = list() if verbose: sys.stdout.write("MC progress: ") for k in range(runs): xn = dist.rvs() # draw filter input signal if not blow is None: if alow is None: alow = 1.0 # FIR low-pass filter xn = lfilter(blow, alow, xn) # low-pass filtered input signal bb = Theta[k, Na - 1:] aa = np.hstack((1.0, Theta[k, :Na - 1])) if isstable(bb, aa): Y[k, :] = lfilter(bb, aa, xn) st_inds.append(k) else: unst_count += 1 # don't apply the IIR filter if it's unstable if np.mod(k, 0.1 * runs) == 0 and verbose: sys.stdout.write(" %d%%" % (np.round(100.0 * k / runs))) if verbose: sys.stdout.write(" 100%\n") if unst_count > 0: print("In %d Monte Carlo %d filters have been unstable" % (runs, unst_count)) print( "These results will not be considered for calculation of mean and " "std") print("However, if return_samples is 'True' then ALL samples are " "returned.") Y = np.roll(Y, int(shift), axis=1) # correct for the (known) sample delay if return_samples: return Y else: y = np.mean(Y[st_inds, :], axis=0) uy = np.cov(Y[st_inds, :], rowvar=False) return y, uy