Example #1
0
    def _accumulate_sufficient_statistics(self, stats, obs, framelogprob,
                                          posteriors, fwdlattice, bwdlattice,
                                          params):
        super(GMMHMM,
              self)._accumulate_sufficient_statistics(stats, obs, framelogprob,
                                                      posteriors, fwdlattice,
                                                      bwdlattice, params)

        for state, g in enumerate(self.gmms_):
            _, lgmm_posteriors = g.score_samples(obs)
            lgmm_posteriors += np.log(posteriors[:, state][:, np.newaxis] +
                                      np.finfo(np.float).eps)
            gmm_posteriors = np.exp(lgmm_posteriors)
            tmp_gmm = GMM(g.n_components, covariance_type=g.covariance_type)
            n_features = g.means_.shape[1]
            tmp_gmm._set_covars(
                distribute_covar_matrix_to_match_covariance_type(
                    np.eye(n_features), g.covariance_type, g.n_components))
            norm = tmp_gmm._do_mstep(obs, gmm_posteriors, params)

            if np.any(np.isnan(tmp_gmm.covars_)):
                raise ValueError

            stats['norm'][state] += norm
            if 'm' in params:
                stats['means'][state] += tmp_gmm.means_ * norm[:, np.newaxis]
            if 'c' in params:
                if tmp_gmm.covariance_type == 'tied':
                    stats['covars'][state] += tmp_gmm.covars_ * norm.sum()
                else:
                    cvnorm = np.copy(norm)
                    shape = np.ones(tmp_gmm.covars_.ndim)
                    shape[0] = np.shape(tmp_gmm.covars_)[0]
                    cvnorm.shape = shape
                    stats['covars'][state] += tmp_gmm.covars_ * cvnorm
    def _accumulate_sufficient_statistics(self, stats, obs, framelogprob,
                                          posteriors, fwdlattice, bwdlattice,
                                          params):
        super(GMMHMM, self)._accumulate_sufficient_statistics(
            stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice,
            params)

        for state, g in enumerate(self.gmms_):
            _, lgmm_posteriors = g.score_samples(obs)
            lgmm_posteriors += np.log(posteriors[:, state][:, np.newaxis]
                                      + np.finfo(np.float).eps)
            gmm_posteriors = np.exp(lgmm_posteriors)
            tmp_gmm = GMM(g.n_components, covariance_type=g.covariance_type)
            n_features = g.means_.shape[1]
            tmp_gmm._set_covars(
                distribute_covar_matrix_to_match_covariance_type(
                    np.eye(n_features), g.covariance_type,
                    g.n_components))
            norm = tmp_gmm._do_mstep(obs, gmm_posteriors, params)

            if np.any(np.isnan(tmp_gmm.covars_)):
                raise ValueError

            stats['norm'][state] += norm
            if 'm' in params:
                stats['means'][state] += tmp_gmm.means_ * norm[:, np.newaxis]
            if 'c' in params:
                if tmp_gmm.covariance_type == 'tied':
                    stats['covars'][state] += tmp_gmm.covars_ * norm.sum()
                else:
                    cvnorm = np.copy(norm)
                    shape = np.ones(tmp_gmm.covars_.ndim)
                    shape[0] = np.shape(tmp_gmm.covars_)[0]
                    cvnorm.shape = shape
                    stats['covars'][state] += tmp_gmm.covars_ * cvnorm
Example #3
0
    def _init(self, X, lengths=None):
        super(GaussianHMM, self)._init(X, lengths=lengths)

        _, n_features = X.shape
        if hasattr(self, 'n_features') and self.n_features != n_features:
            raise ValueError('Unexpected number of dimensions, got %s but '
                             'expected %s' % (n_features, self.n_features))

        self.n_features = n_features
        if 'm' in self.init_params or not hasattr(self, "means_"):
            # kmeans = cluster.KMeans(n_clusters=self.n_components,
            #                         random_state=self.random_state)
            # kmeans.fit(X)
            # self.means_ = kmeans.cluster_centers_

            # kmeans = cluster.KMeans(n_clusters=self.n_components, max_iter=20,n_init=10)
            # kmeans.fit(X)
            # self.means_ = kmeans.cluster_centers_

            kmeans = cluster.MiniBatchKMeans(n_clusters=self.n_components,init='k-means++', max_iter=15,n_init=3)
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                kmeans.fit(X)
            self.means_ = kmeans.cluster_centers_

            # gmm = mixture.GMM(n_components=self.n_components, covariance_type='full').fit(X)
            # self.means_ = gmm.means_

            # #Add new code Viet
            # epsilon = 0.05
            # pmax = 0
            # for i in range(0,self.n_components):
            #     if (self.means_[i][0] > self.means_[pmax][0]):
            #         pmax = i
            # pmin = 0
            # for i in range(0, self.n_components):
            #     if (self.means_[i][0] < self.means_[pmin][0]):
            #         pmin = i
            # self.means_[pmin][0] -= epsilon
            # self.means_[pmax][0] += epsilon

            # v_max = np.max([x[0] for x in X])+1
            # v_min = np.min([x[0] for x in X])-1
            # h = (v_max-v_min)/self.n_components
            # self.means_[0][0] = v_min
            # for i in range(1,self.n_components):
            #     self.means_[i][0] = self.means_[i-1][0] + h


        if 'c' in self.init_params or not hasattr(self, "covars_"):
            cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1])
            if not cv.shape:
                cv.shape = (1, 1)
            self._covars_ = distribute_covar_matrix_to_match_covariance_type(
                cv, self.covariance_type, self.n_components).copy()
Example #4
0
    def _init(self, sequences):
        """Initialize the state, prior to fitting (hot starting)
        """
        sequences = [
            ensure_type(s, dtype=np.float32, ndim=2, name='s')
            for s in sequences
        ]
        self._impl._sequences = sequences

        small_dataset = np.vstack(
            sequences[0:min(len(sequences), self.n_hotstart_sequences)])

        if 'm' in self.init_params:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                self.means_ = cluster.KMeans(n_clusters=self.n_states).fit(
                    small_dataset).cluster_centers_
        if 'c' in self.init_params:
            cv = np.cov(small_dataset.T)
            self.covars_ = \
                distribute_covar_matrix_to_match_covariance_type(
                  cv, 'full', self.n_states)
            self.covars_[self._covars_ == 0] = 1e-5
        if 't' in self.init_params:
            transmat_ = np.empty((self.n_states, self.n_states))
            transmat_.fill(1.0 / self.n_states)
            self.transmat_ = transmat_
            self.populations_ = np.ones(self.n_states) / self.n_states
        if 'a' in self.init_params:
            self.As_ = np.zeros(
                (self.n_states, self.n_features, self.n_features))
            for i in range(self.n_states):
                self.As_[i] = np.eye(self.n_features) - self.eps
        if 'b' in self.init_params:
            self.bs_ = np.zeros((self.n_states, self.n_features))
            for i in range(self.n_states):
                A = self.As_[i]
                mean = self.means_[i]
                self.bs_[i] = np.dot(np.eye(self.n_features) - A, mean)
        if 'q' in self.init_params:
            self.Qs_ = np.zeros(
                (self.n_states, self.n_features, self.n_features))
            for i in range(self.n_states):
                self.Qs_[i] = self.eps * self.covars_[i]
Example #5
0
    def _init(self, X, lengths=None):
        super(GaussianHMM, self)._init(X, lengths=lengths)
        _, n_features = X.shape
        if hasattr(self, 'n_features') and self.n_features != n_features:
            raise ValueError('Unexpected number of dimensions, got %s but '
                             'expected %s' % (n_features, self.n_features))

        self.n_features = n_features
        if 'm' in self.init_params or not hasattr(self, "means_"):
            kmeans = cluster.KMeans(n_clusters=self.n_components,
                                    random_state=self.random_state)
            kmeans.fit(X)
            self.means_ = kmeans.cluster_centers_
        if 'c' in self.init_params or not hasattr(self, "covars_"):
            cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1])
            if not cv.shape:
                cv.shape = (1, 1)
            self._covars_ = distribute_covar_matrix_to_match_covariance_type(
                cv, self.covariance_type, self.n_components).copy()
Example #6
0
    def _init(self, X, lengths=None):
        super(GaussianHMM, self)._init(X, lengths=lengths)

        _, n_features = X.shape
        if hasattr(self, 'n_features') and self.n_features != n_features:
            raise ValueError('Unexpected number of dimensions, got %s but '
                             'expected %s' % (n_features, self.n_features))

        self.n_features = n_features
        if 'm' in self.init_params or not hasattr(self, "means_"):
            kmeans = cluster.KMeans(n_clusters=self.n_components)
            kmeans.fit(X)
            self.means_ = kmeans.cluster_centers_
        if 'c' in self.init_params or not hasattr(self, "covars_"):
            cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1])
            if not cv.shape:
                cv.shape = (1, 1)
            self._covars_ = distribute_covar_matrix_to_match_covariance_type(
                cv, self.covariance_type, self.n_components).copy()
Example #7
0
    def _init(self, obs, params='stmc'):
        super(GaussianHMM, self)._init(obs, params=params)

        if (hasattr(self, 'n_features')
                and self.n_features != obs[0].shape[1]):
            raise ValueError('Unexpected number of dimensions, got %s but '
                             'expected %s' % (obs[0].shape[1],
                                              self.n_features))

        self.n_features = obs[0].shape[1]

        if 'm' in params:
            self._means_ = cluster.KMeans(
                n_clusters=self.n_components).fit(obs[0]).cluster_centers_
        if 'c' in params:
            cv = np.cov(obs[0].T)
            if not cv.shape:
                cv.shape = (1, 1)
            self._covars_ = distribute_covar_matrix_to_match_covariance_type(
                cv, self._covariance_type, self.n_components)
Example #8
0
    def _init(self, obs, params='stmc'):
        super(GaussianHMM, self)._init(obs, params=params)

        if (hasattr(self, 'n_features')
                and self.n_features != obs[0].shape[1]):
            raise ValueError('Unexpected number of dimensions, got %s but '
                             'expected %s' %
                             (obs[0].shape[1], self.n_features))
        self.n_features = obs[0].shape[1]

        if 'm' in params:
            self._means_ = cluster.KMeans(n_clusters=self.n_components).fit(
                obs[0]).cluster_centers_
        if 'c' in params:
            cv = np.cov(obs[0].T)
            if not cv.shape:
                cv.shape = (1, 1)
            self._covars_ = distribute_covar_matrix_to_match_covariance_type(
                cv, self._covariance_type, self.n_components)
            self._covars_[self._covars_ == 0] = 1e-5
Example #9
0
    def _init(self, sequences):
        """Initialize the state, prior to fitting (hot starting)
        """
        sequences = [ensure_type(s, dtype=np.float32, ndim=2, name="s") for s in sequences]
        self.inferrer._sequences = sequences

        small_dataset = np.vstack(sequences[0 : min(len(sequences), self.n_hotstart_sequences)])

        # Initialize means
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            self.means_ = cluster.KMeans(n_clusters=self.n_states).fit(small_dataset).cluster_centers_

        # Initialize covariances
        cv = np.cov(small_dataset.T)
        self.covars_ = distribute_covar_matrix_to_match_covariance_type(cv, "full", self.n_states)
        self.covars_[self.covars_ == 0] = 1e-5
        # Stabilize eigenvalues of matrix
        for i in range(self.n_states):
            self.covars_[i] = self.covars_[i] + 1e-5 * np.eye(self.n_features)

        # Initialize transmat
        transmat_ = np.empty((self.n_states, self.n_states))
        transmat_.fill(1.0 / self.n_states)
        self.transmat_ = transmat_
        self.populations_ = np.ones(self.n_states) / self.n_states

        # Initialize As
        self.As_ = np.zeros((self.n_states, self.n_features, self.n_features))
        self.bs_ = np.zeros((self.n_states, self.n_features))
        for i in range(self.n_states):
            A = self.As_[i]
            mean = self.means_[i]
            self.bs_[i] = np.dot(np.eye(self.n_features) - A, mean)

        # Initialize means

        # Initialize local covariances
        self.Qs_ = np.zeros((self.n_states, self.n_features, self.n_features))
        for i in range(self.n_states):
            self.Qs_[i] = self.eps * self.covars_[i]
Example #10
0
    def _init(self, obs, params='stmc'):
        super(GaussianHMM, self)._init(obs, params=params)

        all_obs = np.concatenate(obs)
        _, n_features = all_obs.shape
        if hasattr(self, 'n_features') and self.n_features != n_features:
            raise ValueError('Unexpected number of dimensions, got %s but '
                             'expected %s' % (n_features, self.n_features))

        self.n_features = n_features
        if 'm' in params:
            kmeans = cluster.KMeans(n_clusters=self.n_components)
            kmeans.fit(all_obs)
            self._means_ = kmeans.cluster_centers_
        if 'c' in params:
            cv = np.cov(all_obs.T)
            if not cv.shape:
                cv.shape = (1, 1)
            self._covars_ = distribute_covar_matrix_to_match_covariance_type(
                cv, self._covariance_type, self.n_components)
            self._covars_[self._covars_ == 0] = 1e-5
Example #11
0
    def _init(self, obs, params='stmc'):
        super(GaussianHMM, self)._init(obs, params=params)

        all_obs = np.concatenate(obs)
        _, n_features = all_obs.shape
        if hasattr(self, 'n_features') and self.n_features != n_features:
            raise ValueError('Unexpected number of dimensions, got %s but '
                             'expected %s' % (n_features, self.n_features))

        self.n_features = n_features
        if 'm' in params:
            kmeans = cluster.KMeans(n_clusters=self.n_components)
            kmeans.fit(all_obs)
            self._means_ = kmeans.cluster_centers_
        if 'c' in params:
            cv = np.cov(all_obs.T)
            if not cv.shape:
                cv.shape = (1, 1)
            self._covars_ = distribute_covar_matrix_to_match_covariance_type(
                cv, self._covariance_type, self.n_components)
            self._covars_[self._covars_ == 0] = 1e-5
Example #12
0
    def _init(self, sequences):
        """Initialize the state, prior to fitting (hot starting)
        """
        sequences = [ensure_type(s, dtype=np.float32, ndim=2, name='s')
           for s in sequences]
        self._impl._sequences = sequences

        small_dataset = np.vstack(
            sequences[0:min(len(sequences), self.n_hotstart_sequences)])

        if 'm' in self.init_params:
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                self.means_ = cluster.KMeans(n_clusters=self.n_states).fit(small_dataset).cluster_centers_
        if 'c' in self.init_params:
            cv = np.cov(small_dataset.T)
            self.covars_ = \
                distribute_covar_matrix_to_match_covariance_type(
                  cv, 'full', self.n_states)
            self.covars_[self._covars_==0] = 1e-5
        if 't' in self.init_params:
            transmat_ = np.empty((self.n_states, self.n_states))
            transmat_.fill(1.0 / self.n_states)
            self.transmat_ = transmat_
            self.populations_ = np.ones(self.n_states) / self.n_states
        if 'a' in self.init_params:
            self.As_ = np.zeros((self.n_states, self.n_features, self.n_features))
            for i in range(self.n_states):
                self.As_[i] = np.eye(self.n_features) - self.eps
        if 'b' in self.init_params:
            self.bs_ = np.zeros((self.n_states, self.n_features))
            for i in range(self.n_states):
                A = self.As_[i]
                mean = self.means_[i]
                self.bs_[i] = np.dot(np.eye(self.n_features) -A, mean)
        if 'q' in self.init_params:
            self.Qs_ = np.zeros((self.n_states, self.n_features,
                self.n_features))
            for i in range(self.n_states):
                self.Qs_[i] = self.eps * self.covars_[i]
Example #13
0
    def _init(self, X, lengths=None, params='stmc'):
        super(GaussianHMM, self)._init(X, lengths=lengths, params=params)

        _, n_features = X.shape
        if hasattr(self, 'n_features') and self.n_features != n_features:
            raise ValueError('Unexpected number of dimensions, got %s but '
                             'expected %s' % (n_features, self.n_features))

        self.n_features = n_features
        if 'm' in params or not hasattr(self, "means_"):
            kmeans = cluster.KMeans(n_clusters=self.n_components)
            kmeans.fit(X)
            self.means_ = kmeans.cluster_centers_
        if 'c' in params or not hasattr(self, "covars_"):
            cv = np.cov(X.T)
            if not cv.shape:
                cv.shape = (1, 1)
            self._covars_ = distribute_covar_matrix_to_match_covariance_type(
                cv, self.covariance_type, self.n_components)
            self._covars_ = self._covars_.copy()
            if self._covars_.any() == 0:
                self._covars_[self._covars_ == 0] = 1e-5
Example #14
0
    def _init(self, obs, params='stmc'):
        super(WeightedGaussianHMM, self)._init(obs, params=params)

        if (hasattr(self, 'n_features')
                and self.n_features != obs[0].shape[1]):
            raise ValueError('Unexpected number of dimensions, got %s but '
                             'expected %s' % (obs[0].shape[1],
                                              self.n_features))

        self.n_features = obs[0].shape[1]

        if 'm' in params:
            # Evenly spaced states 
            indices = np.fix(
                np.linspace(0,obs[0].shape[0]-1,self.n_components)).astype(int)
            self._means_ = obs[0][indices,:]
        if 'c' in params:
            cv = np.cov(obs[0].T).clip(min=1e-3)
            if not cv.shape:
                cv.shape = (1, 1)
            self._covars_ = distribute_covar_matrix_to_match_covariance_type(
                cv, self._covariance_type, self.n_components)
Example #15
0
    def _init(self, X, logger, kmeans_opt, lengths=None):
        super(GaussianHMM, self)._init(X, lengths=lengths)

        _, n_features = X.shape
        if hasattr(self, 'n_features') and self.n_features != n_features:
            raise ValueError('Unexpected number of dimensions, got %s but '
                             'expected %s' % (n_features, self.n_features))

        self.n_features = n_features

        if 'm' in self.init_params or not hasattr(self, "means_"):

            if kmeans_opt == 'REGULAR' or kmeans_opt == '':
                logger.getLogger('tab.regular').info('using K-means model')
                kmeans = cluster.KMeans(n_clusters=self.n_components, n_jobs=4, verbose=True)
            else:
                logger.getLogger('tab.regular').info('using Mini Batch K-Means model')
                kmeans = cluster.MiniBatchKMeans(n_clusters=self.n_components, batch_size=1000000,
                                                 compute_labels=False, verbose=True)

            logger.getLogger('tab.regular.time').info('starting training model')
            kmeans.fit(X)
            logger.getLogger('tab.regular.time').info('finished training k-means model')

            self.means_ = kmeans.cluster_centers_

        if 'c' in self.init_params or not hasattr(self, "covars_"):
            logger.getLogger('tab.regular.time').info('starting calculating covariances')
            cv = np.cov(X[:].T)
            logger.getLogger('tab.regular.time').info('finished calculating covariances')

            if not cv.shape:
                cv.shape = (1, 1)
            self._covars_ = distribute_covar_matrix_to_match_covariance_type(
                cv, self.covariance_type, self.n_components)
            self._covars_ = self._covars_.copy()
            if self._covars_.any() == 0:
                self._covars_[self._covars_ == 0] = 1e-5

for numState in range(2, maxSubstates+1):
    
    # make the output directory
    call(["mkdir", "-p", "data/substates/%s%d/%d" % (basepath,stateNum,numState)])
    
    # do the replicates
    for repInx in range(0, numReps):
        print "Doing replicate", repInx, "/", numReps, "with", numState, "states"
        sys.stdout.flush()
        
        # cluster all the available data and use that as initial point
        means = cluster.KMeans(n_clusters=numState).fit(indata.iloc[:,0:num_data]).cluster_centers_
        cv = np.cov(indata.iloc[:,0:num_data].T)
        covars = mixture.distribute_covar_matrix_to_match_covariance_type(cv, "tied", num_data)
        covars[covars==0] = 1e-5
        
        model = GaussianHMM(numState, covariance_type="tied", n_iter=1000, init_params='abdefghijklnopqrstuvwxyzABDEFGHIJKLNOPQRSTUVWXYZ')
        model.means_ = means
        model.covars_ = covars
        
        print("Fitting model...")
        sys.stdout.flush()
        model.fit(data)

        print("Decoding states...")
        sys.stdout.flush()
        # do a loop over everything and record in one long array
        states = np.array([])
        score = 0