コード例 #1
0
ファイル: ghmm.py プロジェクト: kyleabeauchamp/msmbuilder
    def _init(self, sequences, init_params):
        """Find initial means(hot start)"""
        sequences = [ensure_type(s, dtype=np.float32, ndim=2, name='s', warn_on_cast=False)
                     for s in sequences]
        self._impl._sequences = sequences

        if self.n_hotstart == 'all':
            small_dataset = np.vstack(sequences)
        else:
            small_dataset = np.vstack(sequences[0:min(len(sequences), self.n_hotstart)])

        if self.init_algo == "GMM" and ("m" in init_params or "v" in init_params):
            mixture = sklearn.mixture.GMM(self.n_states, n_init=1, random_state=self.random_state)
            mixture.fit(small_dataset)
            if "m" in init_params:
                self.means_ = mixture.means_
            if "v" in init_params:
                self.vars_ = mixture.covars_
        else:
            if 'm' in init_params:
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    self.means_ = cluster.KMeans(
                        n_clusters=self.n_states, n_init=1, init='random',
                        n_jobs=self.n_jobs, random_state=self.random_state).fit(
                        small_dataset).cluster_centers_
            if 'v' in init_params:
                self.vars_ = np.vstack([np.var(small_dataset, axis=0)] * self.n_states)
        if 't' in init_params:
            transmat_ = np.empty((self.n_states, self.n_states))
            transmat_.fill(1.0 / self.n_states)
            self.transmat_ = transmat_
            self.populations_ = np.ones(self.n_states) / self.n_states
コード例 #2
0
    def __init__(self, observational_samples, true_observational_samples):

        self.Y = np.asarray(observational_samples['Y'])[:, np.newaxis]
        self.N = np.asarray(observational_samples['N'])[:, np.newaxis]
        self.CO = np.asarray(observational_samples['CO'])[:, np.newaxis]
        self.T = np.asarray(observational_samples['T'])[:, np.newaxis]
        self.D = np.asarray(observational_samples['D'])[:, np.newaxis]
        self.P = np.asarray(observational_samples['P'])[:, np.newaxis]
        self.O = np.asarray(observational_samples['O'])[:, np.newaxis]
        self.S = np.asarray(observational_samples['S'])[:, np.newaxis]
        self.L = np.asarray(observational_samples['L'])[:, np.newaxis]
        self.TE = np.asarray(observational_samples['TE'])[:, np.newaxis]
        self.C = np.asarray(observational_samples['C'])[:, np.newaxis]

        true_Y = np.asarray(true_observational_samples['Y'])[:, np.newaxis]
        true_N = np.asarray(true_observational_samples['N'])[:, np.newaxis]
        true_CO = np.asarray(true_observational_samples['CO'])[:, np.newaxis]
        true_T = np.asarray(true_observational_samples['T'])[:, np.newaxis]
        true_D = np.asarray(true_observational_samples['D'])[:, np.newaxis]
        true_P = np.asarray(true_observational_samples['P'])[:, np.newaxis]
        true_O = np.asarray(true_observational_samples['O'])[:, np.newaxis]
        true_S = np.asarray(true_observational_samples['S'])[:, np.newaxis]
        true_L = np.asarray(true_observational_samples['L'])[:, np.newaxis]
        true_TE = np.asarray(true_observational_samples['TE'])[:, np.newaxis]
        true_C = np.asarray(true_observational_samples['C'])[:, np.newaxis]

        self.reg_Y = LinearRegression().fit(
            np.hstack(
                (true_L, true_N, true_P, true_O, true_C, true_CO, true_TE)),
            true_Y)
        self.reg_P = LinearRegression().fit(
            np.hstack((true_S, true_T, true_D, true_TE)), true_P)
        self.reg_O = LinearRegression().fit(
            np.hstack((true_S, true_T, true_D, true_TE)), true_O)
        self.reg_CO = LinearRegression().fit(
            np.hstack((true_S, true_T, true_D, true_TE)), true_CO)
        self.reg_T = LinearRegression().fit(true_S, true_T)
        self.reg_D = LinearRegression().fit(true_S, true_D)
        self.reg_C = LinearRegression().fit(
            np.hstack((true_N, true_L, true_TE)), true_C)
        self.reg_S = LinearRegression().fit(true_TE, true_S)
        self.reg_TE = LinearRegression().fit(true_L, true_TE)

        ## Define distributions for the exogenous variables
        params_list = scipy.stats.gamma.fit(true_L)
        self.dist_Light = scipy.stats.gamma(a=params_list[0],
                                            loc=params_list[1],
                                            scale=params_list[2])

        mixture = sklearn.mixture.GaussianMixture(n_components=3)
        mixture.fit(true_N)
        self.dist_Nutrients_PC1 = mixture
コード例 #3
0
def gmm(X, k):
    """
    Function that calculates a GMM from a dataset

    Arguments:
     - X is a numpy.ndarray of shape (n, d) containing the dataset
     - k is the number of clusters

    Returns:
     pi, m, S, clss, bic
        - pi is a numpy.ndarray of shape (k,) containing the cluster priors
        - m is a numpy.ndarray of shape (k, d) containing the centroid means
        - S is a numpy.ndarray of shape (k, d, d) containing
            the covariance matrices
        - clss is a numpy.ndarray of shape (n,) containing the cluster indices
            for each data point
        - bic is a numpy.ndarray of shape (kmax - kmin + 1) containing the BIC
            value for each cluster size tested
    """

    mixture = sklearn.mixture.GaussianMixture(n_components=k)
    g = mixture.fit(X)
    m = g.means_
    S = g.covariances_
    pi = g.weights_
    clss = mixture.predict(X)
    bic = mixture.bic(X)

    return pi, m, S, clss, bic
コード例 #4
0
    def _init(self, sequences, init_params):
        """Find initial means(hot start)"""
        sequences = [
            ensure_type(s,
                        dtype=np.float32,
                        ndim=2,
                        name='s',
                        warn_on_cast=False) for s in sequences
        ]
        self._impl._sequences = sequences

        if self.n_hotstart == 'all':
            small_dataset = np.vstack(sequences)
        else:
            small_dataset = np.vstack(
                sequences[0:min(len(sequences), self.n_hotstart)])

        if self.init_algo == "GMM" and ("m" in init_params
                                        or "v" in init_params):
            mixture = sklearn.mixture.GMM(self.n_states,
                                          n_init=1,
                                          random_state=self.random_state)
            mixture.fit(small_dataset)
            if "m" in init_params:
                self.means_ = mixture.means_
            if "v" in init_params:
                self.vars_ = mixture.covars_
        else:
            if 'm' in init_params:
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    self.means_ = cluster.KMeans(
                        n_clusters=self.n_states,
                        n_init=1,
                        init='random',
                        n_jobs=self.n_jobs,
                        random_state=self.random_state).fit(
                            small_dataset).cluster_centers_
            if 'v' in init_params:
                self.vars_ = np.vstack([np.var(small_dataset, axis=0)] *
                                       self.n_states)
        if 't' in init_params:
            transmat_ = np.empty((self.n_states, self.n_states))
            transmat_.fill(1.0 / self.n_states)
            self.transmat_ = transmat_
            self.populations_ = np.ones(self.n_states) / self.n_states
コード例 #5
0
def get_gaussian_covariance(x, name):
    covariances = []
    ks = range(2, 20)
    for k in range(2, 20):
        mixture = sklearn.mixture.GaussianMixture(k,
                                                  covariance_type='spherical',
                                                  max_iter=200,
                                                  n_init=10)
        mixture.fit(x)
        covariances.append(np.mean(mixture.covariances_))

    plt.figure()
    plt.scatter(ks, covariances)
    plt.title(f'Gaussian mixture mean covariances ({name})')
    plt.xlabel('k')
    plt.ylabel('mean covariance')
    plt.xticks(ks)

    a, b = np.polyfit(np.log(ks), covariances, 1)
    plt.plot(ks, a * np.log(ks) + b, color=_colors[1])

    save_plot(f'performance/covariances-{name}')
コード例 #6
0
def gmm(X, k):
    """
    calcule gmm
    :param X:
    :param k:
    :return:
    """

    mixture = sklearn.mixture.GaussianMixture(n_components=k)
    g = mixture.fit(X)
    m = g.means_
    S = g.covariances_
    pi = g.weights_
    clss = mixture.predict(X)
    bic = mixture.bic(X)

    return pi, m, S, clss, bic
コード例 #7
0
def gmm(X, k):
    """
    Calculates a GMM from a dataset
    :param X: is a numpy.ndarray of shape (n, d) containing the dataset
    :param k: is the number of clusters
    :return: pi, m, S, clss, bic
        pi is a numpy.ndarray of shape (k,) containing the cluster priors
        m is a numpy.ndarray of shape (k, d) containing the centroid means
        S is a numpy.ndarray of shape (k, d, d) containing the covariance
        matrices
        clss is a numpy.ndarray of shape (n,) containing the cluster indices
        for each data point
        bic is a numpy.ndarray of shape (kmax - kmin + 1) containing the BIC
        value for each cluster size tested
    """
    mixture = sklearn.mixture.GaussianMixture(n_components=k)
    mixture_fit = mixture.fit(X)
    m = mixture_fit.means_
    S = mixture_fit.covariances_
    pi = mixture_fit.weights_
    clss = mixture.predict(X)
    bic = mixture.bic(X)

    return pi, m, S, clss, bic