def fit_gaussian(S, Cin, param_means, param_sigmas): ''' generates predictions using parameters sampled form the distribution, then outputs a gaussian fitted to the predictions ''' output_samples = sample_output(sol, Cin, true_params, prior_sigmas) * 100 # generate approximate gaussian bins = np.array(range(1000)) indeces = np.digitize(output_samples, bins) count1 = np.bincount(indeces[:, 0], minlength=1001)[1:] print(bins.shape) print(count1.shape) print(count1.shape) print(count1) count2 = np.bincount(indeces[:, 1], minlength=1001)[1:] # fit gaussians ''' popt1 = curve_fit(gaussian, bins, count1) popt2 = curve_fit(gaussian, bins, count2) print('popt1', popt1) print('popt2', popt2) ''' plt.plot(bins, count1) plt.plot(bins, count2) plt.show() sys.exit()
def _init_common_params(self): """Initialize parameters common to RFLVMs. """ # Initialize and then stabilize the estimated latent variable `X`. pca = PCA(n_components=self.D, random_state=self.rng) self.X = pca.fit_transform(self.Y) self._stabilize_x() # Initialize K cluster mean vectors and covariance matrices. self.mu = np.empty((self.K, self.D)) self.Sigma = np.empty((self.K, self.D, self.D)) for k in range(self.K): self.Sigma[k] = self.iw0.rvs() var0 = 1. / self.prior_obs * self.Sigma[k] self.mu[k] = self.rng.multivariate_normal(self.mu0, var0) # Initialize cluster assignments and counts. self.Z = self.rng.choice(self.K, size=self.M_div_2) self.Z_count = np.bincount(self.Z, minlength=self.K) # Initialize `W` to approximate RBF kernel. self.W = self.rng.normal(0, 1, size=(self.M_div_2, self.D)) # Gibb samples for analysis and visualization after burn-in. self.X_samples = np.empty((self.n_samples, self.N, self.D))
def average_path_length(tree, X): """Compute average path length: cost of simulating the average example; this is used in the objective function. @param tree: DecisionTreeClassifier instance @param X: NumPy array (D x N) D := number of dimensions N := number of examples @return path_length: float average path length """ #added for computing anomalies apl # if(X.shape[0]==1): # intermediate = [0]*tree.tree_.node_count # print(intermediate) # intermediate[tree.apply(X)[0]] = 1 # print(intermediate) # leaf_counts = intermediate # print(leaf_counts) # else: leaf_indices = tree.apply(X) leaf_counts = np.bincount(leaf_indices) leaf_i = np.arange(tree.tree_.node_count) path_length = np.dot(leaf_i, leaf_counts) / float(X.shape[0]) return path_length
def check(x): if x.ndim == 1: assert_array_almost_equal( grouped_sum(x, groups), np.bincount(groups, x)) check_grads(grouped_sum)(x, groups) check_grads(grouped_sum)(x, groups, num_groups=n_groups + 4)
def sufficientStats(cls, x, constParams=None): # Compute T( x ) assert (isinstance(x, np.ndarray) and x.ndim == 1) or isinstance( x, list), x D = constParams assert D is not None t1 = np.bincount(x, minlength=D) return (t1, )
def average_path_length(tree, X): """Compute average path length: cost of simulating the average example; this is used in the objective function. @param tree: DecisionTreeClassifier instance @param X: NumPy array (D x N) D := number of dimensions N := number of examples @return path_length: float average path length """ leaf_indices = tree.apply(X) leaf_counts = np.bincount(leaf_indices) leaf_i = np.arange(tree.tree_.node_count) path_length = np.dot(leaf_i, leaf_counts) / float(X.shape[0]) return path_length
def count_transitions(labels): return np.vstack([ np.bincount(labels[1:][labels[:-1] == i], minlength=N) for i in xrange(N) ])
def _init_params(self, data, lengths=None, params='stmpaw'): X = data['obs'] if self.n_lags == 0: super(ARTHMM, self)._init_params(data, lengths, params) else: if 's' in params: super(ARTHMM, self)._init_params(data, lengths, 's') if 't' in params: super(ARTHMM, self)._init_params(data, lengths, 't') if 'm' in params or 'a' in params or 'p' in params: kmmod = cluster.KMeans(n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ ar_mod = [] ar_alpha = [] ar_resid = [] if not self.shared_alpha: count = 0 for u in range(self.n_unique): for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ u,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[count].params[1:]) ar_resid.append(ar_mod[count].resid) count += 1 else: # run one AR model on most part of time series # that has most points assigned after clustering mf = np.argmax(np.bincount(kmmod.labels_)) for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ mf,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[f].params[1:]) ar_resid.append(ar_mod[f].resid) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): ar_idx = u if self.shared_alpha: ar_idx = 0 mu_init[u, f] = kmeans[u, f] - np.dot( np.repeat(kmeans[u, f], self.n_lags), ar_alpha[ar_idx]) self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = \ np.zeros((self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): if self.n_features == 1: precision_init[u] = 1.0 / (np.var( X[kmmod.labels_ == u])) else: precision_init[u] = np.linalg.inv\ (np.cov(np.transpose(X[kmmod.labels_ == u]))) # Alternative: Initialization using ar_resid #for f in range(self.n_features): # if not self.shared_alpha: # precision_init[u,f,f] = 1./np.var(ar_resid[count]) # count += 1 # else: # precision_init[u,f,f] = 1./np.var(ar_resid[f])''' self.precision_ = np.copy(precision_init) if 'a' in params: if self.shared_alpha: alpha_init = np.zeros((1, self.n_lags)) alpha_init = ar_alpha[0].reshape((1, self.n_lags)) else: alpha_init = np.zeros((self.n_unique, self.n_lags)) for u in range(self.n_unique): ar_idx = 0 alpha_init[u] = ar_alpha[ar_idx] ar_idx += self.n_features self.alpha_ = np.copy(alpha_init)
def _init_params(self, data, lengths=None, params='stmpaw'): X = data['obs'] if self.n_lags == 0: super(ARTHMM, self)._init_params(data, lengths, params) else: if 's' in params: super(ARTHMM, self)._init_params(data, lengths, 's') if 't' in params: super(ARTHMM, self)._init_params(data, lengths, 't') if 'm' in params or 'a' in params or 'p' in params: kmmod = cluster.KMeans( n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ ar_mod = [] ar_alpha = [] ar_resid = [] if not self.shared_alpha: for u in range(self.n_unique): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ u]).fit(self.n_lags)) ar_alpha.append(ar_mod[u].params[1:]) ar_resid.append(ar_mod[u].resid) else: # run one AR model on most part of time series # that has most points assigned after clustering mf = np.argmax(np.bincount(kmmod.labels_)) ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ mf]).fit(self.n_lags)) ar_alpha.append(ar_mod[0].params[1:]) ar_resid.append(ar_mod[0].resid) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): ar_idx = u if self.shared_alpha: ar_idx = 0 mu_init[u] = kmeans[u, 0] - np.dot( np.repeat(kmeans[u, 0], self.n_lags), ar_alpha[ar_idx]) self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): if not self.shared_alpha: maxVar = np.max([np.var(ar_resid[i]) for i in range(self.n_unique)]) else: maxVar = np.var(ar_resid[0]) precision_init[u] = 1.0 / maxVar self.precision_ = np.copy(precision_init) if 'a' in params: alpha_init = np.zeros((self.n_unique, self.n_lags)) for u in range(self.n_unique): ar_idx = u if self.shared_alpha: ar_idx = 0 alpha_init[u, :] = ar_alpha[ar_idx] self.alpha_ = alpha_init
def atomsDistances(positions, cell, cutoff_radius=6.0, self_interaction=False): """ Compute the distance of every atom to its neighbors. This function computes the distances of every central atom to its neighbors. If the distances is larger than the cutoff radius, then the distances will be handled as 0. Here, periodic boundary condition is assuming true for every axis. Parameters: ----------- positions: np.ndarray Atomic positions. The size of this tensor will be (N_atoms, 3), where N_atoms is the number of atoms in the cluster. cell: np.ndarray Periodic cell, which has the size of (3, 3) cutoff_radius: float Cutoff Radius, which is a hyper parameters. The default is 6.0 Angstrom. self_interaction: boolean Default is False, which means that results will not consider the atom itself as its neighbor. Returns: ---------- distances: np.ndarray Differentialble distances array. first_atoms: np.ndarray Atoms that we observed in the cell. The np.unique of first_atoms will be np.arange of the number of atoms in the cell. second_atoms: np.ndarray Atoms that are considered as the neighbor atoms of first atoms. The distances of first_atoms and second_atoms will be computed and stored in the distances array. cell_shift_vector: np.ndarray The cell shift vector of every atom. """ # Compute reciprocal lattice vectors. inverse_cell = np.linalg.pinv(cell).T # Compute distances of cell faces. face_dist_c = 1 / np.linalg.norm(inverse_cell, axis=0) # We use a minimum bin size of 3 A bin_size = max(cutoff_radius, 3) # Compute number of bins, the minimum bin size must be [1., 1., 1.]. nbins_c = np.maximum( (face_dist_c / bin_size - (face_dist_c / bin_size) % 1), [1., 1., 1.]) nbins = np.prod(nbins_c) # Compute the number of neighbor cell that need to be search neighbor_search_x, neighbor_search_y, neighbor_search_z =\ np.ceil(bin_size * nbins_c / face_dist_c).astype(int) # Sort atoms into bins. scaled_positions_ic = np.dot(positions, inverse_cell) % 1 bin_index_ic = scaled_positions_ic * nbins_c - (scaled_positions_ic * nbins_c) % 1 # Convert Cartesian bin index to unique scalar bin index. bin_index_i = (bin_index_ic[:, 0] + nbins_c[0] * (bin_index_ic[:, 1] + nbins_c[1] * bin_index_ic[:, 2])) # atom_i contains atom index in new sort order. atom_i = np.argsort(bin_index_i) bin_index_i = bin_index_i[atom_i] # Compute the maximum number of atoms in a bin max_natoms_per_bin = np.bincount(np.int_(bin_index_i)).max() # Sort atoms into bins. The atoms_in_bin_ba contains the information about where the atoms located. atoms_in_bin_ba = -np.ones([np.int_(nbins), max_natoms_per_bin], dtype=int) for i in range(max_natoms_per_bin): # Create a mask array that identifies the first atom of each bin. mask = np.append([True], bin_index_i[:-1] != bin_index_i[1:]) # Assign all first atoms. atoms_in_bin_ba[np.int_(bin_index_i[mask]), i] = atom_i[mask] # Remove atoms that we just sorted into atoms_in_bin_ba. The next # "first" atom will be the second and so on. mask = np.logical_not(mask) atom_i = atom_i[mask] bin_index_i = bin_index_i[mask] # Create the shift list that indicates that where the cell might shift. shift = [] for x in range(-neighbor_search_x, neighbor_search_x + 1): for y in range(-neighbor_search_y, neighbor_search_y + 1): for z in range(-neighbor_search_z, neighbor_search_z + 1): shift += [[x, y, z]] # Therefore, the possible positions of neighborhood bin can be computed by the following code. neighborbin = (bin_index_ic[:, None] + np.array(shift)[None, :]) % nbins_c cell_shift = ((bin_index_ic[:, None] + np.array(shift)[None, :]) - neighborbin) / nbins_c neighborbin = neighborbin[:, :, 0] + nbins_c[0] * ( neighborbin[:, :, 1] + nbins_c[1] * neighborbin[:, :, 2]) distances = [] first_atoms = [] second_atoms = [] cell_shift_vector = [] for i in range(len(positions)): # Create a mask that indicates which neighborhood bin contains atoms. if self_interaction: mask = (atoms_in_bin_ba[np.int_(neighborbin[i])] != -1) else: mask = np.logical_and( atoms_in_bin_ba[np.int_(neighborbin[i])] != -1, atoms_in_bin_ba[np.int_(neighborbin[i])] != i) distances_vec = positions[atoms_in_bin_ba[np.int_( neighborbin[i])]] - positions[i] # the distance should consider the cell shift distances_vec = distances_vec + np.dot(cell_shift[i], cell)[:, None] # make the cell shift vector for every atom instead of every bin. _cell_shift_vector = np.repeat(cell_shift[i][:, None], max_natoms_per_bin, axis=1)[mask] distances_vec = distances_vec[mask] temp_distances = np.sum(distances_vec * distances_vec, axis=1) temp_distances = (temp_distances)**0.5 cutoff_mask = (temp_distances < cutoff_radius) _second_atoms = atoms_in_bin_ba[np.int_( neighborbin[i])][mask][cutoff_mask] _first_atoms = [i] * len(_second_atoms) _cell_shift_vector = _cell_shift_vector[cutoff_mask] first_atoms.extend(_first_atoms) second_atoms.extend(_second_atoms) distances.extend(temp_distances[cutoff_mask]) cell_shift_vector.extend(_cell_shift_vector) distances = np.array(distances) cell_shift_vector = np.array(cell_shift_vector) first_atoms = np.array(first_atoms) second_atoms = np.array(second_atoms) return distances, first_atoms, second_atoms, cell_shift_vector
def _init_params(self, data, lengths=None, params='stmpaw'): X = data['obs'] if self.n_lags == 0: super(ARTHMM, self)._init_params(data, lengths, params) else: if 's' in params: super(ARTHMM, self)._init_params(data, lengths, 's') if 't' in params: super(ARTHMM, self)._init_params(data, lengths, 't') if 'm' in params or 'a' in params or 'p' in params: kmmod = cluster.KMeans( n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ ar_mod = [] ar_alpha = [] ar_resid = [] if not self.shared_alpha: count = 0 for u in range(self.n_unique): for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ u,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[count].params[1:]) ar_resid.append(ar_mod[count].resid) count += 1 else: # run one AR model on most part of time series # that has most points assigned after clustering mf = np.argmax(np.bincount(kmmod.labels_)) for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ mf,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[f].params[1:]) ar_resid.append(ar_mod[f].resid) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): ar_idx = u if self.shared_alpha: ar_idx = 0 mu_init[u,f] = kmeans[u, f] - np.dot( np.repeat(kmeans[u, f], self.n_lags), ar_alpha[ar_idx]) self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = \ np.zeros((self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): if self.n_features == 1: precision_init[u] = 1.0/(np.var(X[kmmod.labels_ == u])) else: precision_init[u] = np.linalg.inv\ (np.cov(np.transpose(X[kmmod.labels_ == u]))) # Alternative: Initialization using ar_resid #for f in range(self.n_features): # if not self.shared_alpha: # precision_init[u,f,f] = 1./np.var(ar_resid[count]) # count += 1 # else: # precision_init[u,f,f] = 1./np.var(ar_resid[f])''' self.precision_ = np.copy(precision_init) if 'a' in params: if self.shared_alpha: alpha_init = np.zeros((1, self.n_lags)) alpha_init = ar_alpha[0].reshape((1, self.n_lags)) else: alpha_init = np.zeros((self.n_unique, self.n_lags)) for u in range(self.n_unique): ar_idx = 0 alpha_init[u] = ar_alpha[ar_idx] ar_idx += self.n_features self.alpha_ = np.copy(alpha_init)