def _init(self, sequences, init_params): """Find initial means(hot start)""" sequences = [ensure_type(s, dtype=np.float32, ndim=2, name='s', warn_on_cast=False) for s in sequences] self._impl._sequences = sequences if self.n_hotstart == 'all': small_dataset = np.vstack(sequences) else: small_dataset = np.vstack(sequences[0:min(len(sequences), self.n_hotstart)]) if self.init_algo == "GMM" and ("m" in init_params or "v" in init_params): mixture = sklearn.mixture.GMM(self.n_states, n_init=1, random_state=self.random_state) mixture.fit(small_dataset) if "m" in init_params: self.means_ = mixture.means_ if "v" in init_params: self.vars_ = mixture.covars_ else: if 'm' in init_params: with warnings.catch_warnings(): warnings.simplefilter("ignore") self.means_ = cluster.KMeans( n_clusters=self.n_states, n_init=1, init='random', n_jobs=self.n_jobs, random_state=self.random_state).fit( small_dataset).cluster_centers_ if 'v' in init_params: self.vars_ = np.vstack([np.var(small_dataset, axis=0)] * self.n_states) if 't' in init_params: transmat_ = np.empty((self.n_states, self.n_states)) transmat_.fill(1.0 / self.n_states) self.transmat_ = transmat_ self.populations_ = np.ones(self.n_states) / self.n_states
def __init__(self, observational_samples, true_observational_samples): self.Y = np.asarray(observational_samples['Y'])[:, np.newaxis] self.N = np.asarray(observational_samples['N'])[:, np.newaxis] self.CO = np.asarray(observational_samples['CO'])[:, np.newaxis] self.T = np.asarray(observational_samples['T'])[:, np.newaxis] self.D = np.asarray(observational_samples['D'])[:, np.newaxis] self.P = np.asarray(observational_samples['P'])[:, np.newaxis] self.O = np.asarray(observational_samples['O'])[:, np.newaxis] self.S = np.asarray(observational_samples['S'])[:, np.newaxis] self.L = np.asarray(observational_samples['L'])[:, np.newaxis] self.TE = np.asarray(observational_samples['TE'])[:, np.newaxis] self.C = np.asarray(observational_samples['C'])[:, np.newaxis] true_Y = np.asarray(true_observational_samples['Y'])[:, np.newaxis] true_N = np.asarray(true_observational_samples['N'])[:, np.newaxis] true_CO = np.asarray(true_observational_samples['CO'])[:, np.newaxis] true_T = np.asarray(true_observational_samples['T'])[:, np.newaxis] true_D = np.asarray(true_observational_samples['D'])[:, np.newaxis] true_P = np.asarray(true_observational_samples['P'])[:, np.newaxis] true_O = np.asarray(true_observational_samples['O'])[:, np.newaxis] true_S = np.asarray(true_observational_samples['S'])[:, np.newaxis] true_L = np.asarray(true_observational_samples['L'])[:, np.newaxis] true_TE = np.asarray(true_observational_samples['TE'])[:, np.newaxis] true_C = np.asarray(true_observational_samples['C'])[:, np.newaxis] self.reg_Y = LinearRegression().fit( np.hstack( (true_L, true_N, true_P, true_O, true_C, true_CO, true_TE)), true_Y) self.reg_P = LinearRegression().fit( np.hstack((true_S, true_T, true_D, true_TE)), true_P) self.reg_O = LinearRegression().fit( np.hstack((true_S, true_T, true_D, true_TE)), true_O) self.reg_CO = LinearRegression().fit( np.hstack((true_S, true_T, true_D, true_TE)), true_CO) self.reg_T = LinearRegression().fit(true_S, true_T) self.reg_D = LinearRegression().fit(true_S, true_D) self.reg_C = LinearRegression().fit( np.hstack((true_N, true_L, true_TE)), true_C) self.reg_S = LinearRegression().fit(true_TE, true_S) self.reg_TE = LinearRegression().fit(true_L, true_TE) ## Define distributions for the exogenous variables params_list = scipy.stats.gamma.fit(true_L) self.dist_Light = scipy.stats.gamma(a=params_list[0], loc=params_list[1], scale=params_list[2]) mixture = sklearn.mixture.GaussianMixture(n_components=3) mixture.fit(true_N) self.dist_Nutrients_PC1 = mixture
def gmm(X, k): """ Function that calculates a GMM from a dataset Arguments: - X is a numpy.ndarray of shape (n, d) containing the dataset - k is the number of clusters Returns: pi, m, S, clss, bic - pi is a numpy.ndarray of shape (k,) containing the cluster priors - m is a numpy.ndarray of shape (k, d) containing the centroid means - S is a numpy.ndarray of shape (k, d, d) containing the covariance matrices - clss is a numpy.ndarray of shape (n,) containing the cluster indices for each data point - bic is a numpy.ndarray of shape (kmax - kmin + 1) containing the BIC value for each cluster size tested """ mixture = sklearn.mixture.GaussianMixture(n_components=k) g = mixture.fit(X) m = g.means_ S = g.covariances_ pi = g.weights_ clss = mixture.predict(X) bic = mixture.bic(X) return pi, m, S, clss, bic
def _init(self, sequences, init_params): """Find initial means(hot start)""" sequences = [ ensure_type(s, dtype=np.float32, ndim=2, name='s', warn_on_cast=False) for s in sequences ] self._impl._sequences = sequences if self.n_hotstart == 'all': small_dataset = np.vstack(sequences) else: small_dataset = np.vstack( sequences[0:min(len(sequences), self.n_hotstart)]) if self.init_algo == "GMM" and ("m" in init_params or "v" in init_params): mixture = sklearn.mixture.GMM(self.n_states, n_init=1, random_state=self.random_state) mixture.fit(small_dataset) if "m" in init_params: self.means_ = mixture.means_ if "v" in init_params: self.vars_ = mixture.covars_ else: if 'm' in init_params: with warnings.catch_warnings(): warnings.simplefilter("ignore") self.means_ = cluster.KMeans( n_clusters=self.n_states, n_init=1, init='random', n_jobs=self.n_jobs, random_state=self.random_state).fit( small_dataset).cluster_centers_ if 'v' in init_params: self.vars_ = np.vstack([np.var(small_dataset, axis=0)] * self.n_states) if 't' in init_params: transmat_ = np.empty((self.n_states, self.n_states)) transmat_.fill(1.0 / self.n_states) self.transmat_ = transmat_ self.populations_ = np.ones(self.n_states) / self.n_states
def get_gaussian_covariance(x, name): covariances = [] ks = range(2, 20) for k in range(2, 20): mixture = sklearn.mixture.GaussianMixture(k, covariance_type='spherical', max_iter=200, n_init=10) mixture.fit(x) covariances.append(np.mean(mixture.covariances_)) plt.figure() plt.scatter(ks, covariances) plt.title(f'Gaussian mixture mean covariances ({name})') plt.xlabel('k') plt.ylabel('mean covariance') plt.xticks(ks) a, b = np.polyfit(np.log(ks), covariances, 1) plt.plot(ks, a * np.log(ks) + b, color=_colors[1]) save_plot(f'performance/covariances-{name}')
def gmm(X, k): """ calcule gmm :param X: :param k: :return: """ mixture = sklearn.mixture.GaussianMixture(n_components=k) g = mixture.fit(X) m = g.means_ S = g.covariances_ pi = g.weights_ clss = mixture.predict(X) bic = mixture.bic(X) return pi, m, S, clss, bic
def gmm(X, k): """ Calculates a GMM from a dataset :param X: is a numpy.ndarray of shape (n, d) containing the dataset :param k: is the number of clusters :return: pi, m, S, clss, bic pi is a numpy.ndarray of shape (k,) containing the cluster priors m is a numpy.ndarray of shape (k, d) containing the centroid means S is a numpy.ndarray of shape (k, d, d) containing the covariance matrices clss is a numpy.ndarray of shape (n,) containing the cluster indices for each data point bic is a numpy.ndarray of shape (kmax - kmin + 1) containing the BIC value for each cluster size tested """ mixture = sklearn.mixture.GaussianMixture(n_components=k) mixture_fit = mixture.fit(X) m = mixture_fit.means_ S = mixture_fit.covariances_ pi = mixture_fit.weights_ clss = mixture.predict(X) bic = mixture.bic(X) return pi, m, S, clss, bic