Beispiel #1
0
 def _do_mstep(self, stats):
     "customized for combination fixed and estimated transition probs"
     assert not 's' in self.params
     # update transition probs if specified in params
     # follows base._do_mstep()
     if 't' in self.params:
         t_ind = self.est_transition_indices
         cp = np.copy(self.transmat_[t_ind[:, None], t_ind])
         cp = np.where(cp == 0.0, cp, stats['trans'][t_ind[:, None], t_ind])
         normalize(cp, axis=1)
         self.transmat_[t_ind[:, None], t_ind] = cp
     # update emission probs if specified in params
     if 'e' in self.params:
         # the below code just normalizes the stats['obs'] matrix
         # so the rows sum to 1. The rows themselves represent the
         # probability of drawing from each of the n_features
         # features, for each component (of which there are n_components)
         sums = stats['obs'].sum(1)
         stats['obs'][sums == 0, :] = [0.25, 0.25, 0.25, 0.25]
         self.emissionprob_ = (stats['obs']
                               / stats['obs'].sum(1)[:, np.newaxis])
     if np.any(np.isnan(self.transmat_)) or \
         np.any(np.isnan(self.emissionprob_)):
         print >> sys.stderr, stats['obs']
         raise Exception("Found nans")
Beispiel #2
0
    def test_fit(self, params='stpmaw', n_iter=15, **kwargs):
        h = self.h
        h.params = params

        lengths = 5000

        X, true_state_sequence = h.sample(lengths, random_state=self.prng)

        # perturb parameters
        h.startprob_ = normalize(self.prng.rand(self.n_unique))
        h.transmat_ = normalize(self.prng.rand(self.n_unique,
                                               self.n_unique), axis=1)
        h.alpha_ = np.array([[0.001], [0.001]])
        h.var_ = np.array([0.5, 0.5])
        h.mu = np.array([10.0, 8.0])

        trainll = fit_hmm_and_monitor_log_likelihood(h, X, n_iter=n_iter)

        # Check that the log-likelihood is always increasing during training.
        #diff = np.diff(trainll)
        #self.assertTrue(np.all(diff >= -1e-6),
        #                "Decreasing log-likelihood: {0}" .format(diff))

        assert_array_almost_equal(h.mu_.reshape(-1), self.mu.reshape(-1),
                                  decimal=1)
        assert_array_almost_equal(h.var_.reshape(-1), self.var.reshape(-1),
                                  decimal=1)
        assert_array_almost_equal(h.transmat_.reshape(-1),
                                  self.transmat.reshape(-1), decimal=1)
        assert_array_almost_equal(h.alpha_.reshape(-1),
                                  self.alpha.reshape(-1), decimal=1)
Beispiel #3
0
    def _init(self, X, lengths=None):

        super(DirMulHMM, self)._init(X, lengths=lengths)
        self.random_state_ = check_random_state(self.random_state)

        X = _to_list(X)

        if 'e' in self.init_params:
            self.emission_suffstats_ = []
            self.emission_prior_ = []

            z = coo_matrix(
                self.random_state_.multinomial(
                    1,
                    np.ones(self.n_components) / self.n_components,
                    X[0].shape[0]))
            for modi in range(len(X)):
                # random init but with read depth offset
                _, n_features = X[modi].shape

                # prior
                x = np.array(X[modi].sum(0)) + 1.
                normalize(x)
                x *= self.emission_prior

                self.emission_prior_.append(x)

                r = z.T.dot(X[modi]).toarray()

                self.emission_suffstats_.append(r)
        self.n_features = [x.shape[1] for x in X]
Beispiel #4
0
    def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs):
        h = hmm.GMMHMM(self.n_components, covars_prior=1.0)
        h.startprob_ = self.startprob
        h.transmat_ = normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.gmms_ = self.gmms_

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10, random_state=self.prng)[0]
                     for x in range(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(train_obs)
        h.transmat_ = normalize(self.prng.rand(self.n_components,
                                               self.n_components), axis=1)
        h.startprob_ = normalize(self.prng.rand(self.n_components))

        trainll = train_hmm_and_keep_track_of_log_likelihood(
            h, train_obs, n_iter=n_iter, params=params)[1:]

        if not np.all(np.diff(trainll) > 0) and verbose:
            print('Test train: (%s)\n  %s\n  %s' % (params, trainll,
                                                    np.diff(trainll)))

        # XXX: this test appears to check that training log likelihood should
        # never be decreasing (up to a tolerance of 0.5, why?) but this is not
        # the case when the seed changes.
        raise SkipTest("Unstable test: trainll is not always increasing "
                       "depending on seed")

        self.assertTrue(np.all(np.diff(trainll) > -0.5))
Beispiel #5
0
    def _set_startprob(self, startprob):

        if startprob is None:
            startprob = np.tile(1.0 / self.n_components, self.n_components)
        else:
            startprob = np.asarray(startprob, dtype=np.float)

            if not np.alltrue(startprob <= 1.0):
                normalize(startprob)

            if len(startprob) != self.n_components:
                if len(startprob) == self.n_unique:
                    startprob_split = np.copy(startprob) / (1.0 + self.n_tied)
                    startprob = np.zeros(self.n_components)
                    for u in range(self.n_unique):
                        for t in range(self.n_chain):
                            startprob[u*(self.n_chain)+t] = \
                                startprob_split[u].copy()
                else:
                    raise ValueError("cannot match shape of startprob")

        if not np.allclose(np.sum(startprob), 1.0):
            raise ValueError('startprob must sum to 1.0')

        self._log_startprob = np.log(np.asarray(startprob).copy())
Beispiel #6
0
    def _do_mstep(self, stats):
        super(GMMHMM, self)._do_mstep(stats)

        # All that is left to do is to apply covars_prior to the
        # parameters updated in _accumulate_sufficient_statistics.
        for state, g in enumerate(self.gmms_):
            n_features = g.means_.shape[1]
            norm = stats['norm'][state]
            if 'w' in self.params:
                g.weights_ = norm.copy()
                normalize(g.weights_)
            if 'm' in self.params:
                g.means_ = stats['means'][state] / norm[:, np.newaxis]
            if 'c' in self.params:
                if g.covariance_type == 'tied':
                    g.covars_ = ((stats['covars'][state] +
                                  self.covars_prior * np.eye(n_features)) /
                                 norm.sum())
                else:
                    cvnorm = np.copy(norm)
                    shape = np.ones(g.covars_.ndim, dtype=np.int)
                    shape[0] = np.shape(g.covars_)[0]
                    cvnorm.shape = shape
                    if g.covariance_type in ['spherical', 'diag']:
                        g.covars_ = (stats['covars'][state] +
                                     self.covars_prior) / cvnorm - g.means_**2
                    elif g.covariance_type == 'full':
                        eye = np.eye(n_features)
                        g.covars_ = ((stats['covars'][state] +
                                      self.covars_prior * eye[np.newaxis]) /
                                     cvnorm) - g.means_**2
    def adjust_traffic_model_(self):
        """Offset model params to avoid over-/under-flow"""
        # replace NaN and show warning.
        self.traffic_model.startprob_, flag1 = \
            self.fixnan(self.traffic_model.startprob_)
        self.traffic_model.transmat_, flag2 = \
            self.fixnan(self.traffic_model.transmat_)
        self.traffic_model.emissionrates_, flag3 = \
            self.fixnan(self.traffic_model.emissionrates_)
        if self.verbose > 2:
            print " " * 12 + "SJTUModel.adjust_traffic_model_():",
            print "model param NaN replacement {}, {}, {}".format(
                flag1, flag2, flag3)

        self.traffic_model.startprob_prior += self.ADJUST_OFFSET
        self.traffic_model.transmat_ += self.ADJUST_OFFSET
        if self.TRAFFIC_MODEL_TYPE == 'Poisson' or \
           self.TRAFFIC_MODEL_TYPE == 'MMPP':
            self.traffic_model.emissionrates_ += self.ADJUST_OFFSET  # when the model is general MMPP
        elif self.TRAFFIC_MODEL_TYPE == 'IPP':
            self.traffic_model.emissionrates_[0] += self.ADJUST_OFFSET
            self.traffic_model.emissionrates_[1] = 0.0  # when the model is IPP
        else:
            raise ValueError('Unknown traffic model type {}'.format(
                self.MODEL_TYPE))

        normalize(self.traffic_model.startprob_)
        normalize(self.traffic_model.transmat_, axis=1)

        return
Beispiel #8
0
    def init_params(self, X):
        '''
        Parameters initialization.
        '''
        if type(X) == list:
            X = np.concatenate(X)

        self.mixCoefUnNorm = np.random.rand(self.n_nodes, self.mix_dim) + 1e-9
        self.mixCoef = np.reshape(np.ones(3) * (1/3), (1, 3))#relu_normalization(self.mixCoefUnNorm, axis=1) #  #

        startProb = np.exp(np.random.randn(self.mix_dim, self.n_components))
        normalize(startProb, axis=1)

        transProb = np.exp(np.random.randn(self.mix_dim, self.n_components,
                                           self.n_components))
        normalize(transProb, axis=2)

        self.time_ = 1
        self.first_moment_ = np.zeros_like(self.mixCoef)
        self.second_moment_ = np.zeros_like(self.mixCoef)

        if self.emission == 'gaussian':
            self.mixModels = [hmm.GaussianHMM(n_components=self.n_components,
                                              covariance_type='diag')
                              for i in range(self.mix_dim)]

            for m in range(self.mix_dim):
                self.mixModels[m]._init(X)

        else:
            raise NotImplementedError('{} emission is not implemented'
                                      .format(self.emission))
    def update_belief_state(self, last_state, last_action, observation):
        """Estimate belief state using current observation.

        belief_state = (last_traffic_belief, current_q, last_sleep_flag)
        """
        if observation is None or len(
                self.traffic_window) < self.TRAFFIC_WINDOW_SIZE:
            return None

        (last_q, last_traffic_req, current_q) = observation
        (sleep_flag, control_req) = last_action

        # Get traffic belief state. Assume traffic window already contain latest observation
        framelogprob = self.traffic_model._compute_log_likelihood(
            np.array(self.traffic_window)
            [:, None])  # observation log prob. for each time step
        _, fwdlattice = self.traffic_model._do_forward_pass(
            framelogprob)  # log posteriors for each time step
        posterior = np.exp(fwdlattice[-1, :])
        normalize(posterior)
        traffic_belief = self.quantize_belief_state_(posterior[0])

        current_q = self.limit_queue_length(current_q)

        # last sleeping state = last sleep flag
        pass

        return (traffic_belief, current_q, sleep_flag)
Beispiel #10
0
    def test_fit(self, params='ste', n_iter=5, **kwargs):
        h = self.h
        h.params = params

        lengths = np.array([10] * 10)
        X, _state_sequence = h.sample(lengths.sum(), random_state=self.prng)

        # Mess up the parameters and see if we can re-learn them.
        h.startprob_ = normalize(self.prng.rand(self.n_components))
        h.transmat_ = normalize(self.prng.rand(self.n_components,
                                               self.n_components),
                                axis=1)
        h.emissionprob_ = normalize(self.prng.rand(self.n_components,
                                                   self.n_features),
                                    axis=1)

        trainll = fit_hmm_and_monitor_log_likelihood(h,
                                                     X,
                                                     lengths=lengths,
                                                     n_iter=n_iter)

        # Check that the log-likelihood is always increasing during training.
        diff = np.diff(trainll)
        self.assertTrue(np.all(diff >= -1e-6),
                        "Decreasing log-likelihood: {0}".format(diff))
Beispiel #11
0
	def update_emissionprob(self, stats, states_indices):
		total = stats['obs'].sum()
		emissionprob_ = np.asarray(self.emissionprob_)
		emissionprob_[states_indices, :] = (stats['obs'] / stats['obs'].sum(1)[:, np.newaxis])[states_indices, :]
		normalize(emissionprob_, axis=1)
		emissionprob_ = np.where(self.emissionprob_ > EPS, emissionprob_ + 1. / total, self.emissionprob_)
		self.emissionprob_ = emissionprob_
Beispiel #12
0
	def learn(self, ids, model):
		startprob_ = np.array([1, 1, 0, 1, 0.01], dtype='float32')
		print startprob_
		self.startprob_ = normalize(startprob_)

		date_transmat_ = np.array([[0, 1, 0], [1, 0, 1], [1, 0, 0]], dtype='float32')
		transmat_ = np.zeros((self.n_components, self.n_components))
		transmat_[0:3, 0:3] = date_transmat_
		transmat_[3, 3] = 1
		transmat_[:, -1] = 1
		transmat_[1, -1] = 0
		print transmat_
		self.transmat_ = normalize(transmat_, 1)

		training_data = self.preprocess(ids, model, True)[0]

		emissionprob_ = np.zeros((self.n_components, len(self.voca)))

		value = 0
		emissionprob_[0, :] = encode(map(str, range(1960, 2016) + range(60, 100)) + map("{0:02d}".format, range(16)), self.voca)
		emissionprob_[1, :] = encode(map("{0:02d}".format, range(1, 13)), self.voca)
		date_prob = encode(map("{0:02d}".format, range(1, 31)), self.voca)
		date_prob[self.voca.get('31')] = 0.5
		emissionprob_[2, :] = date_prob

		emissionprob_[3, :-1] = 1

		emissionprob_[-1, -1] = 1
		self.emissionprob_ = normalize(emissionprob_, 1)

		self.fit(training_data)
Beispiel #13
0
    def test_fit(self, params='stpmaw', n_iter=5, **kwargs):
        h = self.h
        h.params = params

        lengths = 1000
        X, _state_sequence = h.sample(lengths, random_state=self.prng)

        # Perturb
        pstarting = self.prng.rand(self.n_components)
        normalize(pstarting)
        h.startprob_ = pstarting
        ptransmat = self.prng.rand(self.n_components, self.n_components)
        normalize(ptransmat, axis=1)
        h.transmat_ = ptransmat

        # TODO: Test more parameters, generate test cases
        trainll = fit_hmm_and_monitor_log_likelihood(
            h, X, n_iter=n_iter)

        # Check that the log-likelihood is always increasing during training.
        #diff = np.diff(trainll)
        #self.assertTrue(np.all(diff >= -1e-6),
        #                "Decreasing log-likelihood: {0}" .format(diff))

        assert_array_almost_equal(h.mu_.reshape(-1),
                                  self.mu.reshape(-1), decimal=1)
        assert_array_almost_equal(h.var_.reshape(-1),
                                  self.var.reshape(-1), decimal=1)
        assert_array_almost_equal(h.transmat_.reshape(-1),
                                  self.transmat.reshape(-1), decimal=2)
Beispiel #14
0
    def test_only_emission_train(self,
                                 n_samples=100,
                                 n_sequences=30,
                                 tr_params="e"):
        """
        Test if the emission probabilities can be re-learnt. 

        :param n_samples: number of samples to generate for each sequence, defaults to 100
        :type n_samples: int, optional
        :param n_sequences: number of sequences to generate, defaults to 30
        :type n_sequences: int, optional
        :param tr_params: which model parameters to train, defaults to "e"
        :type tr_params: str, optional
        """
        h = self.h
        h.tr_params = tr_params
        # Generate observation sequences
        X = self.h.sample(n_sequences=n_sequences, n_samples=n_samples)

        # Mess up the emission probabilities and see if we can re-learn them.
        h.B = np.asarray([
            np.random.random((self.n_states, self.n_features[i]))
            for i in range(self.n_emissions)
        ])
        for i in range(self.n_emissions):
            normalize(h.B[i], axis=1)

        h, log_likelihoods = h._train(X,
                                      n_iter=100,
                                      conv_thresh=0.01,
                                      return_log_likelihoods=True,
                                      no_init=True)

        # we consider learning if the log_likelihood increases
        assert np.all(np.round(np.diff(log_likelihoods), 10) >= 0)
Beispiel #15
0
    def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs):
        h = hmm.GMMHMM(self.n_components, covars_prior=1.0)
        h.startprob_ = self.startprob
        h.transmat_ = normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.gmms_ = self.gmms_

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10, random_state=self.prng)[0]
                     for x in range(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(train_obs)
        h.transmat_ = normalize(self.prng.rand(self.n_components,
                                                   self.n_components), axis=1)
        h.startprob_ = normalize(self.prng.rand(self.n_components))

        trainll = train_hmm_and_keep_track_of_log_likelihood(
            h, train_obs, n_iter=n_iter, params=params)[1:]

        if not np.all(np.diff(trainll) > 0) and verbose:
            print('Test train: (%s)\n  %s\n  %s' % (params, trainll,
                                                    np.diff(trainll)))

        # XXX: this test appears to check that training log likelihood should
        # never be decreasing (up to a tolerance of 0.5, why?) but this is not
        # the case when the seed changes.
        raise SkipTest("Unstable test: trainll is not always increasing "
                       "depending on seed")

        self.assertTrue(np.all(np.diff(trainll) > -0.5))
Beispiel #16
0
    def test_fit(self, params='stpmaw', n_iter=15, **kwargs):
        h = self.h
        h.params = params

        lengths = 5000

        X, true_state_sequence = h.sample(lengths, random_state=self.prng)

        # perturb parameters
        h.startprob_ = normalize(self.prng.rand(self.n_unique))
        h.transmat_ = normalize(self.prng.rand(self.n_unique, self.n_unique),
                                axis=1)
        h.alpha_ = np.array([[0.001], [0.001]])
        h.var_ = np.array([0.5, 0.5])
        h.mu = np.array([10.0, 8.0])

        trainll = fit_hmm_and_monitor_log_likelihood(h, X, n_iter=n_iter)

        # Check that the log-likelihood is always increasing during training.
        #diff = np.diff(trainll)
        #self.assertTrue(np.all(diff >= -1e-6),
        #                "Decreasing log-likelihood: {0}" .format(diff))

        assert_array_almost_equal(h.mu_.reshape(-1),
                                  self.mu.reshape(-1),
                                  decimal=1)
        assert_array_almost_equal(h.var_.reshape(-1),
                                  self.var.reshape(-1),
                                  decimal=1)
        assert_array_almost_equal(h.transmat_.reshape(-1),
                                  self.transmat.reshape(-1),
                                  decimal=1)
        assert_array_almost_equal(h.alpha_.reshape(-1),
                                  self.alpha.reshape(-1),
                                  decimal=1)
Beispiel #17
0
    def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs):
        h = hmm.GMMHMM(self.n_components, covars_prior=1.0)
        h.startprob_ = self.startprob
        h.transmat_ = normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.gmms_ = self.gmms

        lengths = [10] * 10
        X, _state_sequence = h.sample(sum(lengths), random_state=self.prng)

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(X, lengths=lengths)
        h.transmat_ = normalize(self.prng.rand(self.n_components,
                                               self.n_components),
                                axis=1)
        h.startprob_ = normalize(self.prng.rand(self.n_components))

        trainll = fit_hmm_and_monitor_log_likelihood(h,
                                                     X,
                                                     lengths=lengths,
                                                     n_iter=n_iter)
        if not np.all(np.diff(trainll) > 0) and verbose:
            print('Test train: (%s)\n  %s\n  %s' %
                  (params, trainll, np.diff(trainll)))

        # XXX: this test appears to check that training log likelihood should
        # never be decreasing (up to a tolerance of 0.5, why?) but this is not
        # the case when the seed changes.
        raise SkipTest("Unstable test: trainll is not always increasing "
                       "depending on seed")

        self.assertTrue(np.all(np.diff(trainll) > -0.5))
Beispiel #18
0
    def test_fit(self, params='stmwc', n_iter=5, verbose=False, **kwargs):
        h = hmm.GMMHMM(self.n_components, covars_prior=1.0)
        h.startprob_ = self.startprob
        h.transmat_ = normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.gmms_ = self.gmms

        lengths = [10] * 10
        X, _state_sequence = h.sample(sum(lengths), random_state=self.prng)

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(X, lengths=lengths)
        h.transmat_ = normalize(self.prng.rand(self.n_components,
                                               self.n_components), axis=1)
        h.startprob_ = normalize(self.prng.rand(self.n_components))

        trainll = fit_hmm_and_monitor_log_likelihood(
            h, X, lengths=lengths, n_iter=n_iter)
        if not np.all(np.diff(trainll) > 0) and verbose:
            print('Test train: (%s)\n  %s\n  %s' % (params, trainll,
                                                    np.diff(trainll)))

        # XXX: this test appears to check that training log likelihood should
        # never be decreasing (up to a tolerance of 0.5, why?) but this is not
        # the case when the seed changes.

        self.assertTrue(np.all(np.diff(trainll) > -0.5))
Beispiel #19
0
    def _set_startprob(self, startprob):

        if startprob is None:
            startprob = np.tile(1.0 / self.n_components, self.n_components)
        else:
            startprob = np.asarray(startprob, dtype=np.float)

            if not np.alltrue(startprob <= 1.0):
                normalize(startprob)

            if len(startprob) != self.n_components:
                if len(startprob) == self.n_unique:
                    startprob_split = np.copy(startprob) / (1.0+self.n_tied)
                    startprob = np.zeros(self.n_components)
                    for u in range(self.n_unique):
                        for t in range(self.n_chain):
                            startprob[u*(self.n_chain)+t] = \
                                startprob_split[u].copy()
                else:
                    raise ValueError("cannot match shape of startprob")

        if not np.allclose(np.sum(startprob), 1.0):
            raise ValueError('startprob must sum to 1.0')

        self._log_startprob = np.log(np.asarray(startprob).copy())
Beispiel #20
0
def test_normalize_along_axis():
    A = np.random.normal(42., size=(128, 4))
    for axis in range(A.ndim):
        A[np.random.choice(len(A), size=16), axis] = 0.0
        assert (A[:, axis] == 0.0).any()
        normalize(A, axis=axis)
        assert np.allclose(A.sum(axis=axis), 1.)
Beispiel #21
0
    def _init(self, X, lengths=None):
        self._check_and_set_n_features(X)
        super()._init(X, lengths=lengths)
        self.random_state = check_random_state(self.random_state)

        if 'e' in self.init_params:
            self.emissionprob_ = self.random_state \
                .rand(self.n_components, self.n_features)
            normalize(self.emissionprob_, axis=1)
Beispiel #22
0
	def _do_mstep(self, stats, params):
		if 'e' in params:
			total = stats['obs'].sum()
			emissionprob_ = np.asarray(self.emissionprob_)
			emissionprob_[4, :] = (stats['obs'] / stats['obs'].sum(1)[:, np.newaxis])[4, :]
			emissionprob_ = np.where(self.emissionprob_ > EPS, emissionprob_ + 1. / total, self.emissionprob_ )
			normalize(emissionprob_, axis=1)
			# self.emissionprob_[4, :] = emissionprob_[4, :]
			self.emissionprob_ = emissionprob_
Beispiel #23
0
        def _do_mstep(self, stats):
            hmm._BaseHMM._do_mstep(self, stats)

            if 'e' in self.params:
                emissionprob_ = self.emission_prior - 1.0 + stats['obs']
                self.emissionprob_ = np.where(self.emissionprob_ == 0.0,
                                              self.emissionprob_,
                                              emissionprob_)
                normalize(self.emissionprob_, axis=1)
Beispiel #24
0
def fix_unused_unroll(model, signal):
    pred = model.predict(signal)
    bc = np.bincount(pred,minlength=model.n_components)
    max_id = np.argmax(bc)
    max_covar_id = np.argmax(model.covars_)
    ids = np.argwhere(bc == 0).flatten()
    used = np.argwhere(bc != 0).flatten()
    probs = bc/float(sum(bc))

    mapped = {}

    import random
    import sklearn.mixture

    ids = ids[0:len(used)]

    for id in ids:
        # replace_id = np.random.choice(used)
        # randomly select node to clone according to its "information weight"
        # replace_id = np.random.choice(model.n_components,p=probs)
        replace_id = random.choices(range(model.n_components),weights=bc)[0]

        mapped[id] = (replace_id, 2*bc[replace_id])

        # lower prob of used node
        bc[replace_id] = 0
        # this will make:
        # cloned states for clone fail in GMixture, and make a identical copy
        # cloned states from origin to have same GMixture, and idendical copy as well
        # TODO: if thats okay - store relation and avoid refitting GMixture
        bc[id] = bc[replace_id]


        in_trans = model.transmat_[:,id].copy()

        model.transmat_[id,:] = model.transmat_[replace_id,:]
        model.transmat_[replace_id,id] += model.transmat_[replace_id,replace_id]
        model.transmat_[id,id] += model.transmat_[replace_id,replace_id]
        model.transmat_[replace_id,replace_id] = 2e-290

        # staing in giver state is forbidden
        # in place of that transit to cloned state
        # model.transmat_[replace_id,id] += model.transmat_[replace_id,replace_id]
        # model.transmat_[replace_id,replace_id] = 0.0001
        utils.normalize(model.transmat_, 1)

        model.startprob_[replace_id] /= 2.
        model.startprob_[id] += model.startprob_[replace_id]

        model.means_[id] = model.means_[replace_id]
        # diverge them slighly to cover more ground
        # model.means_[replace_id] *= 1.001
        model._covars_[id] = model._covars_[replace_id]


    print("fixed no nodes",len(ids), mapped)
Beispiel #25
0
	def learn(self, ids, model, gender=None):
		startprob_ = np.array([1, 1, 1, 1, 3, 1e-4], dtype='float32')
		print startprob_
		self.startprob_ = normalize(startprob_)

		namechar_transmat_ = np.array([[0, 1], [1, 1]], dtype='float32')
		transmat_ = np.zeros((self.n_components, self.n_components))
		transmat_[0:2, 0:2] = namechar_transmat_
		transmat_[2:4, 2:4] = namechar_transmat_
		transmat_[4, :] = 1
		transmat_[0:5, 4] = 1
		transmat_[:, -1] = 1
		print transmat_
		self.transmat_ = normalize(transmat_, 1)

		training_data = self.preprocess(ids, model, True)[0]

		emissionprob_ = np.zeros((self.n_components, len(self.voca)))

		value = 0
		kor_sur_probs, eng_sur_probs = get_surname_probs(self.voca, value)
		emissionprob_[0, :] = kor_sur_probs
		emissionprob_[2, :] = eng_sur_probs

		probs = get_namechar_probs(self.voca, value)
		if gender is None:
			kor_name_probs = np.mean(probs[0:4, :], axis=0)
			eng_name_probs = np.mean(probs[4:8, :], axis=0)
		elif gender == 'M':
			kor_name_probs = np.mean(probs[0:2, :], axis=0)
			eng_name_probs = np.mean(probs[4:6, :], axis=0)
		else:
			kor_name_probs = np.mean(probs[2:4, :], axis=0)
			eng_name_probs = np.mean(probs[6:8, :], axis=0)
		emissionprob_[1, :] = kor_name_probs
		emissionprob_[3, :] = eng_name_probs

		# word_probs = np.zeros(len(self.voca))
		# for word, freq in self.word_freq.iteritems():
		# 	idx = self.voca.get(word)
		# 	if idx is not None:
		# 		word_probs[idx] = float(freq)
		# emissionprob_[4, :] = word_probs
		emissionprob_[4, :-1] = 1

		emissionprob_[-1, -1] = 1
		print emissionprob_
		self.emissionprob_ = normalize(emissionprob_, 1)

		# for word, idx in self.voca.iteritems():
		# 	print word, self.emissionprob_[:, idx]

		self.fit(training_data)
Beispiel #26
0
	def learn(self, ids, model, gender=None):
		startprob_ = np.array([1, 1, 1, 1, 1, 1, 1, 1e-4], dtype='float32')
		print startprob_
		self.startprob_ = normalize(startprob_)

		namechar_transmat_ = np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0]], dtype='float32')
		transmat_ = np.zeros((self.n_components, self.n_components))
		transmat_[0:3, 0:3] = namechar_transmat_
		transmat_[3:6, 3:6] = namechar_transmat_
		transmat_[6, :] = 1
		transmat_[0:7, 6] = 1
		transmat_[:, -1] = 1
		print transmat_
		self.transmat_ = normalize(transmat_, 1)

		training_data = self.preprocess(ids, model, True)[0]

		emissionprob_ = np.zeros((self.n_components, len(self.voca)))

		value = 0
		kor_sur_probs, eng_sur_probs = get_surname_probs(self.voca, value)
		emissionprob_[0, :] = kor_sur_probs
		emissionprob_[3, :] = eng_sur_probs

		probs = get_namechar_probs(self.voca, value)
		if gender is None:
			kor_front_probs = (probs[0, :] + probs[2, :]) / 2
			kor_back_probs = (probs[1, :] + probs[3, :]) / 2
			eng_front_probs = (probs[4, :] + probs[6, :]) / 2
			eng_back_probs = (probs[5, :] + probs[7, :]) / 2
		elif gender == 'M':
			kor_front_probs = probs[0, :]
			kor_back_probs = probs[1, :]
			eng_front_probs = probs[4, :]
			eng_back_probs = probs[5, :]
		else:
			kor_front_probs = probs[2, :]
			kor_back_probs = probs[3, :]
			eng_front_probs = probs[6, :]
			eng_back_probs = probs[7, :]
		emissionprob_[1, :] = kor_front_probs
		emissionprob_[2, :] = kor_back_probs
		emissionprob_[4, :] = eng_front_probs
		emissionprob_[5, :] = eng_back_probs

		emissionprob_[6, :-1] = 1

		emissionprob_[-1, -1] = 1
		print emissionprob_
		self.emissionprob_ = normalize(emissionprob_, 1)

		self.fit(training_data)
Beispiel #27
0
    def _do_mstep(self, stats):
        '''
        Performs the M step of the EM algorithm, updating all model parameters.
        Inputs:
            stats - dictionary containing sufficient statistics.
        '''
        if self.reg_:
            self._fit_coef(stats)
        else:
            self.mixCoef = stats['mix_post']
            normalize(self.mixCoef, axis=1)

        for m in range(self.mix_dim):
            self.mixModels[m]._do_mstep(stats['mix_idx' + str(m)])
    def _do_mstep(self, stats):
        if 's' in self.params:
            startprob_ = stats['start']
            self.startprob_ = np.where(self.startprob_ == 0.0, self.startprob_,
                                       startprob_)
            normalize(self.startprob_)
        if 't' in self.params:
            transmat_ = stats['trans']
            self.transmat_ = np.where(self.transmat_ == 0.0, self.transmat_,
                                      transmat_)
            normalize(self.transmat_, axis=1)

            for i, row in enumerate(self.transmat_):
                if not np.any(row):
                    self.transmat_[i][i] = 1
Beispiel #29
0
    def test_fit(self, params='stmc', n_iter=5, **kwargs):
        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.transmat_ = normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.means_ = 20 * self.means
        h.covars_ = self.covars[self.covariance_type]

        lengths = [10] * 10
        X, _state_sequence = h.sample(sum(lengths), random_state=self.prng)

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(X, lengths=lengths)

        trainll = fit_hmm_and_monitor_log_likelihood(h,
                                                     X,
                                                     lengths=lengths,
                                                     n_iter=n_iter)

        # Check that the log-likelihood is always increasing during training.
        diff = np.diff(trainll)
        message = ("Decreasing log-likelihood for {0} covariance: {1}".format(
            self.covariance_type, diff))
        self.assertTrue(np.all(diff >= -1e-6), message)
Beispiel #30
0
    def test_fit(self, params='stmc', n_iter=5, verbose=False, **kwargs):
        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.transmat_ = normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.means_ = 20 * self.means
        h.covars_ = self.covars[self.covariance_type]

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10)[0] for x in range(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(train_obs)

        trainll = train_hmm_and_keep_track_of_log_likelihood(
            h, train_obs, n_iter=n_iter, params=params, **kwargs)[1:]

        # Check that the loglik is always increasing during training
        if not np.all(np.diff(trainll) > 0) and verbose:
            print('Test train: %s (%s)\n  %s\n  %s'
                  % (self.covariance_type, params, trainll, np.diff(trainll)))
        delta_min = np.diff(trainll).min()
        self.assertTrue(
            delta_min > -0.8,
            "The min nll increase is %f which is lower than the admissible"
            " threshold of %f, for model %s. The likelihoods are %s."
            % (delta_min, -0.8, self.covariance_type, trainll))
Beispiel #31
0
    def _do_mstep(self, stats, params):
        # Based on Huang, Acero, Hon, "Spoken Language Processing",
        # p. 443 - 445
        if self.startprob_prior is None:
            self.startprob_prior = 1.0
        if self.transmat_prior is None:
            self.transmat_prior = 1.0

        if 's' in params:
            self.startprob_ = normalize(
                np.maximum(self.startprob_prior - 1.0 + stats['start'], 1e-20))
        if 't' in params:
            transmat_ = normalize(
                np.maximum(self.transmat_prior - 1.0 + stats['trans'], 1e-20),
                axis=1)
            self.transmat_ = transmat_
Beispiel #32
0
    def test_fit(self, params='stmc', n_iter=5, verbose=False, **kwargs):
        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.transmat_ = normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.means_ = 20 * self.means
        h.covars_ = self.covars[self.covariance_type]

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10)[0] for x in range(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(train_obs)

        trainll = train_hmm_and_keep_track_of_log_likelihood(
            h, train_obs, n_iter=n_iter, params=params, **kwargs)[1:]

        # Check that the loglik is always increasing during training
        if not np.all(np.diff(trainll) > 0) and verbose:
            print('Test train: %s (%s)\n  %s\n  %s'
                  % (self.covariance_type, params, trainll, np.diff(trainll)))
        delta_min = np.diff(trainll).min()
        self.assertTrue(
            delta_min > -0.8,
            "The min nll increase is %f which is lower than the admissible"
            " threshold of %f, for model %s. The likelihoods are %s."
            % (delta_min, -0.8, self.covariance_type, trainll))
    def sample_transition_(self, state, action):
        """Sample next state and reward conditioned on state and action."""
        if len(self.traffic_window) < self.TRAFFIC_WINDOW_SIZE:
            return None, None

        (last_traffic_belief, current_q, last_sleep_flag) = state
        (sleep_flag, control_req) = action

        # Traffic state
        # predict prior belief for current traffic state
        cur_traffic_pred = np.matmul(
            np.array([last_traffic_belief, 1 - last_traffic_belief]),
            self.traffic_model.transmat_)
        normalize(cur_traffic_pred)
        # sample current traffic state, 0: wake, 1: sleep
        cur_traffic_state = int(np.random.rand() < cur_traffic_pred[0])
        # sample current observation
        cur_traffic_ob = poisson.rvs(
            self.traffic_model.emissionrates_[cur_traffic_state])
        # compute posterior belief for current traffic state
        posterior = poisson.pmf(
            cur_traffic_ob, self.traffic_model.emissionrates_) * \
            cur_traffic_pred
        normalize(posterior)

        # Queue state
        total_req = current_q + cur_traffic_ob
        next_q = total_req if sleep_flag == True else 0  # queue all or serve all

        # Reward
        if self.BETA is not None:
            reward = self.BETA * (
                        self.R_SERVE * total_req * int(not sleep_flag) +
                        self.R_WAIT * total_req * int(sleep_flag)
                     ) + \
                    (1-self.BETA) * (
                        self.C_OP * int(not sleep_flag) +
                        self.C_SW * int(last_sleep_flag!=sleep_flag)
                     )
        else:
            reward = self.R_SERVE * total_req * int(not sleep_flag) + \
                     self.R_WAIT * total_req * int(sleep_flag) + \
                     self.C_OP * int(not sleep_flag) + \
                     self.C_SW * int(last_sleep_flag!=sleep_flag)

        return (posterior[0], next_q, sleep_flag), reward
Beispiel #34
0
    def _set_startprob(self, startprob):
        if startprob is None:
            startprob = np.tile(1.0 / self.n_components, self.n_components)
        else:
            startprob = np.asarray(startprob, dtype=np.float)

        # check if there exists a component whose value is exactly zero
        # if so, add a small number and re-normalize
        if not np.alltrue(startprob):
            normalize(startprob)

        if len(startprob) != self.n_components:
            raise ValueError('startprob must have length n_components')
        if not np.allclose(np.sum(startprob), 1.0):
            raise ValueError('startprob must sum to 1.0')

        self._log_startprob = np.log(np.asarray(startprob).copy())
    def _set_emissionmat(self, emissionprob):
        # Convert list to numpy array.
        emissionprob = np.asarray(emissionprob)
        if hasattr(self, 'n_symbols') and emissionprob.shape != (self.n_components, self.n_symbols):
            raise ValueError('emissionprob must have shape '
                             '(n_components, n_symbols)')

        # check if there exists a component whose value is exactly zero
        # if so, add a small number and re-normalize
        if not np.alltrue(emissionprob):
            normalize(emissionprob)
        self._emissionmat_ = emissionprob
        # check if there exists any element whose value is NaN
        underflow_idx = np.isnan(self._emissionmat_)
        # set the NaN value as negative inf.
        self._emissionmat_[underflow_idx] = NEGINF
        self.n_symbols = self._emissionmat_.shape[1]
Beispiel #36
0
    def _init(self, X, lengths=None):
        if not self._check_input_symbols(X):
            raise ValueError("expected a sample from "
                             "a Multinomial distribution.")

        super(MultinomialHMM, self)._init(X, lengths=lengths)
        self.random_state = check_random_state(self.random_state)

        if 'e' in self.init_params:
            if not hasattr(self, "n_features"):
                symbols = set()
                for i, j in iter_from_X_lengths(X, lengths):
                    symbols |= set(X[i:j].flatten())
                self.n_features = len(symbols)
            self.emissionprob_ = self.random_state \
                .rand(self.n_components, self.n_features)
            normalize(self.emissionprob_, axis=1)
Beispiel #37
0
    def init_params(self, X):
        '''
        Parameters initialization.
        '''
        if type(X) == list:
            X = np.concatenate(X)

        self.mixCoefUnNorm = np.random.rand(self.n_nodes, self.mix_dim) + 1e-9
        self.mixCoef = np.reshape(np.array([.5, .5]), (1, 2))

        startProb = np.exp(np.random.randn(self.mix_dim, self.n_components))
        normalize(startProb, axis=1)

        transProb = np.exp(
            np.random.randn(self.mix_dim, self.n_components,
                            self.n_components))
        normalize(transProb, axis=2)

        self.time_ = 1
        self.first_moment_ = np.zeros_like(self.mixCoef)
        self.second_moment_ = np.zeros_like(self.mixCoef)

        if self.emission == 'gaussian':
            self.mixModels = [
                RegulizedGaussianHMM(n_components=self.n_components,
                                     covariance_type='diag',
                                     similarity=1,
                                     other_trans=np.zeros((self.n_components,
                                                           self.n_components)),
                                     hyperparam=self.hyperparam,
                                     epsilon=self.epsilon)
                for i in range(self.mix_dim)
            ]

            for m in range(self.mix_dim):
                self.mixModels[m]._init(X)
                seed = 1
                trans = rand_initialization.generate_transitions_random(
                    seed, 2, self.n_components, .1)
                self.mixModels[0].transmat_ = trans[0]
                self.mixModels[1].transmat_ = trans[1]

        else:
            raise NotImplementedError('{} emission is not implemented'.format(
                self.emission))
Beispiel #38
0
    def _init(self, X, lengths=None):
        """estimate emission probs.
        FYI: lengths is for when you have multiple sequences. E.g.
        sequences for different chroms that need to be run through the HMM
        separately."""
        super(ContinuousMultinomialHMM, self)._init(X, lengths=lengths)
        self.random_state = check_random_state(self.random_state)

        if 'e' in self.init_params:
            if not hasattr(self, "n_features"):
                self.n_features = X.shape[1]

            # code from hmm.MultinomialHMM._init
            # random initialization of emission probs
            self.emissionprob_ = self.random_state \
                .rand(self.n_components, self.n_features)
            # from each component, the emission probs should sum to 1:
            normalize(self.emissionprob_, axis=1)
Beispiel #39
0
    def test_fit(self, params='stpmaw', n_iter=50, **kwargs):
        h = self.h
        self.transmat = np.copy(h.transmat_)
        h.params = params
        lengths = 10000
        X, _state_sequence = h.sample(lengths, random_state=self.prng)

        # Perturb
        pstarting = self.prng.rand(self.n_components)
        normalize(pstarting)
        h.startprob_ = pstarting

        template = np.zeros((6,6))
        noise = np.random.normal(3, 2, 12)
        nb = 0
        for row in range(5):
            template[row][row] = noise[nb]
            nb = nb + 1
            template[row][row+1] = noise[nb]
            nb = nb + 1
        template[5][0] = noise[nb]
        nb = nb + 1
        template[5][5] = noise[nb]
        template = abs(template)

        h.transmat_ = np.copy(template)


        # TODO: Test more parameters, generate test cases
        trainll = fit_hmm_and_monitor_log_likelihood(
            h, X, n_iter=n_iter)

        # Check that the log-likelihood is always increasing during training.
        #diff = np.diff(trainll)
        #self.assertTrue(np.all(diff >= -1e-6),
        #                "Decreasing log-likelihood: {0}" .format(diff))

        assert_array_almost_equal(h.mu_.reshape(-1),
                                  self.mu.reshape(-1), decimal=2)
        assert_array_almost_equal(h.var_.reshape(-1),
                                  self.var.reshape(-1), decimal=2)
        assert_array_almost_equal(h.transmat_.reshape(-1),
                                  self.transmat.reshape(-1), decimal=2)
Beispiel #40
0
    def _set_emissionmat(self, emissionprob):
        # Convert list to numpy array.
        emissionprob = np.asarray(emissionprob)
        if hasattr(self,
                   'n_symbols') and emissionprob.shape != (self.n_components,
                                                           self.n_symbols):
            raise ValueError('emissionprob must have shape '
                             '(n_components, n_symbols)')

        # check if there exists a component whose value is exactly zero
        # if so, add a small number and re-normalize
        if not np.alltrue(emissionprob):
            normalize(emissionprob)
        self._emissionmat_ = emissionprob
        # check if there exists any element whose value is NaN
        underflow_idx = np.isnan(self._emissionmat_)
        # set the NaN value as negative inf.
        self._emissionmat_[underflow_idx] = NEGINF
        self.n_symbols = self._emissionmat_.shape[1]
Beispiel #41
0
    def test_fit(self, params='ste', n_iter=5, **kwargs):
        h = self.h
        h.params = params

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10)[0] for x in range(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.startprob_ = normalize(self.prng.rand(self.n_components))
        h.transmat_ = normalize(self.prng.rand(self.n_components,
                                               self.n_components), axis=1)
        h.emissionprob_ = normalize(
            self.prng.rand(self.n_components, self.n_symbols), axis=1)

        trainll = fit_hmm_and_monitor_log_likelihood(h, train_obs, n_iter)

        # Check that the log-likelihood is always increasing during training.
        diff = np.diff(trainll)
        self.assertTrue(np.all(diff >= -1e-6),
                        "Decreasing log-likelihood: {0}" .format(diff))
Beispiel #42
0
    def _set_transmat(self, transmat):
        if transmat is None:
            transmat = np.tile(1.0 / self.n_components,
                               (self.n_components, self.n_components))

        # check if there exists a component whose value is exactly zero
        # if so, add a small number and re-normalize
        if not np.alltrue(transmat):
            normalize(transmat, axis=1)

        if (np.asarray(transmat).shape
                != (self.n_components, self.n_components)):
            raise ValueError('transmat must have shape '
                             '(n_components, n_components)')
        if not np.all(np.allclose(np.sum(transmat, axis=1), 1.0)):
            raise ValueError('Rows of transmat must sum to 1.0')

        self._log_transmat = np.log(np.asarray(transmat).copy())
        underflow_idx = np.isnan(self._log_transmat)
        self._log_transmat[underflow_idx] = NEGINF
Beispiel #43
0
    def _do_mstep(self, stats):
        """Performs the M-step of EM algorithm.

        Parameters
        ----------
        stats : dict
            Sufficient statistics updated from all available samples.
        """
        # The ``np.where`` calls guard against updating forbidden states
        # or transitions in e.g. a left-right HMM.
        if 's' in self.params:
            startprob_ = self.startprob_prior - 1.0 + stats['start'] + EPS
            self.startprob_ = np.where(self.startprob_ == 0.0, self.startprob_,
                                       startprob_)
            normalize(self.startprob_)
        if 't' in self.params:
            transmat_ = self.transmat_prior - 1.0 + stats['trans'] + EPS
            self.transmat_ = np.where(self.transmat_ == 0.0, self.transmat_,
                                      transmat_)
            normalize(self.transmat_, axis=1)
Beispiel #44
0
    def test_fit(self, params='ste', n_iter=5, verbose=False, **kwargs):
        h = self.h

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10)[0] for x in range(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.startprob_ = normalize(self.prng.rand(self.n_components))
        h.transmat_ = normalize(self.prng.rand(self.n_components,
                                                   self.n_components), axis=1)
        h.emissionprob_ = normalize(
            self.prng.rand(self.n_components, self.n_symbols), axis=1)

        trainll = train_hmm_and_keep_track_of_log_likelihood(
            h, train_obs, n_iter=n_iter, params=params, **kwargs)[1:]

        # Check that the loglik is always increasing during training
        if not np.all(np.diff(trainll) > 0) and verbose:
            print('Test train: (%s)\n  %s\n  %s' % (params, trainll,
                                                    np.diff(trainll)))
        self.assertTrue(np.all(np.diff(trainll) > -1.e-3))
Beispiel #45
0
    def _set_transmat(self, transmat_val):
        if transmat_val is None:
            transmat = np.tile(1.0 / self.n_components,
                               (self.n_components, self.n_components))
        else:
            transmat_val[np.isnan(transmat_val)] = 0.0
            normalize(transmat_val, axis=1)

            if (np.asarray(transmat_val).shape == (self.n_components,
                                                   self.n_components)):
                transmat = np.copy(transmat_val)
            elif transmat_val.shape[0] == self.n_unique:
                transmat = self._ntied_transmat(transmat_val)
            else:
                raise ValueError("cannot match shape of transmat")

        if not np.all(np.allclose(np.sum(transmat, axis=1), 1.0)):
            raise ValueError('Rows of transmat must sum to 1.0')
        self._log_transmat = np.log(np.asarray(transmat).copy())
        underflow_idx = np.isnan(self._log_transmat)
        self._log_transmat[underflow_idx] = NEGINF
Beispiel #46
0
    def test_fit(self, params='ste', n_iter=5, **kwargs):
        h = self.h
        h.params = params

        lengths = np.array([10] * 10)
        X, _state_sequence = h.sample(lengths.sum(), random_state=self.prng)

        # Mess up the parameters and see if we can re-learn them.
        h.startprob_ = normalize(self.prng.rand(self.n_components))
        h.transmat_ = normalize(self.prng.rand(self.n_components,
                                               self.n_components), axis=1)
        h.emissionprob_ = normalize(
            self.prng.rand(self.n_components, self.n_features), axis=1)

        trainll = fit_hmm_and_monitor_log_likelihood(
            h, X, lengths=lengths, n_iter=n_iter)

        # Check that the log-likelihood is always increasing during training.
        diff = np.diff(trainll)
        self.assertTrue(np.all(diff >= -1e-6),
                        "Decreasing log-likelihood: {0}" .format(diff))
Beispiel #47
0
    def _set_transmat(self, transmat_val):
        if transmat_val is None:
            transmat = np.tile(1.0 / self.n_components,
                               (self.n_components, self.n_components))
        else:
            transmat_val[np.isnan(transmat_val)] = 0.0
            normalize(transmat_val, axis=1)

            if (np.asarray(transmat_val).shape == (self.n_components,
                                                   self.n_components)):
                transmat = np.copy(transmat_val)
            elif transmat_val.shape[0] == self.n_unique:
                transmat = self._ntied_transmat(transmat_val)
            else:
                raise ValueError("cannot match shape of transmat")

        if not np.all(np.allclose(np.sum(transmat, axis=1), 1.0)):
            raise ValueError('Rows of transmat must sum to 1.0')
        self._log_transmat = np.log(np.asarray(transmat).copy())
        underflow_idx = np.isnan(self._log_transmat)
        self._log_transmat[underflow_idx] = NEGINF
Beispiel #48
0
def double_model(model):

    symbols = model.n_components
    n_symbols = 2 * symbols

    n_model = hmm.GaussianHMM(n_components=n_symbols, verbose=True, min_covar=0.01, init_params='', n_iter = model.n_iter, covariance_type="diag", tol=model.tol)

    transmat_ = np.random.random((n_symbols,n_symbols))/1000
    transmat_[0:symbols,0:symbols] = model.transmat_
    transmat_[symbols:n_symbols,symbols:n_symbols] = model.transmat_
    # unbalance it slightly
    transmat_ += np.random.random((n_symbols,n_symbols))/1000
    n_model.transmat_ = transmat_
    utils.normalize(n_model.transmat_, 1)

    n_model.startprob_ = np.concatenate((model.startprob_, model.startprob_))
    utils.normalize(n_model.startprob_)
    n_model.means_ = np.concatenate((model.means_, model.means_))
    n_model._covars_ = np.concatenate((model._covars_, model._covars_))

    return n_model
Beispiel #49
0
    def test_fit_with_priors(self, params='stmc', n_iter=5):
        startprob_prior = 10 * self.startprob + 2.0
        transmat_prior = 10 * self.transmat + 2.0
        means_prior = self.means
        means_weight = 2.0
        covars_weight = 2.0
        if self.covariance_type in ('full', 'tied'):
            covars_weight += self.n_features
        covars_prior = self.covars[self.covariance_type]

        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.startprob_prior = startprob_prior
        h.transmat_ = normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.transmat_prior = transmat_prior
        h.means_ = 20 * self.means
        h.means_prior = means_prior
        h.means_weight = means_weight
        h.covars_ = self.covars[self.covariance_type]
        h.covars_prior = covars_prior
        h.covars_weight = covars_weight

        lengths = [100] * 10
        X, _state_sequence = h.sample(sum(lengths), random_state=self.prng)

        # Re-initialize the parameters and check that we can converge to the
        # original parameter values.
        h_learn = hmm.GaussianHMM(self.n_components, self.covariance_type,
                                  params=params)
        h_learn.n_iter = 0
        h_learn.fit(X, lengths=lengths)

        fit_hmm_and_monitor_log_likelihood(
            h_learn, X, lengths=lengths, n_iter=n_iter)

        # Make sure we've converged to the right parameters.
        # a) means
        self.assertTrue(np.allclose(sorted(h.means_.tolist()),
                                    sorted(h_learn.means_.tolist()),
                                    0.01))
        # b) covars are hard to estimate precisely from a relatively small
        #    sample, thus the large threshold
        self.assertTrue(np.allclose(sorted(h._covars_.tolist()),
                                    sorted(h_learn._covars_.tolist()),
                                    10))
Beispiel #50
0
def create_random_gmm(n_mix, n_features, covariance_type, prng=0):
    prng = check_random_state(prng)
    g = GMM(n_mix, covariance_type=covariance_type)
    g.means_ = prng.randint(-20, 20, (n_mix, n_features))
    mincv = 0.1
    g.covars_ = {
        'spherical': (mincv + mincv * np.dot(prng.rand(n_mix, 1),
                                             np.ones((1, n_features)))) ** 2,
        'tied': (make_spd_matrix(n_features, random_state=prng)
                 + mincv * np.eye(n_features)),
        'diag': (mincv + mincv * prng.rand(n_mix, n_features)) ** 2,
        'full': np.array(
            [make_spd_matrix(n_features, random_state=prng)
             + mincv * np.eye(n_features) for x in range(n_mix)])
    }[covariance_type]
    g.weights_ = normalize(prng.rand(n_mix))
    return g
Beispiel #51
0
    def test_fit_with_priors(self, params='stmc', n_iter=5):
        startprob_prior = 10 * self.startprob + 2.0
        transmat_prior = 10 * self.transmat + 2.0
        means_prior = self.means
        means_weight = 2.0
        covars_weight = 2.0
        if self.covariance_type in ('full', 'tied'):
            covars_weight += self.n_features
        covars_prior = self.covars[self.covariance_type]

        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.startprob_prior = startprob_prior
        h.transmat_ = normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.transmat_prior = transmat_prior
        h.means_ = 20 * self.means
        h.means_prior = means_prior
        h.means_weight = means_weight
        h.covars_ = self.covars[self.covariance_type]
        h.covars_prior = covars_prior
        h.covars_weight = covars_weight

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=100)[0] for x in range(10)]

        # Re-initialize the parameters and check that we can converge to the
        # original parameter values.
        h_learn = hmm.GaussianHMM(self.n_components, self.covariance_type,
                                  params=params)
        h_learn.n_iter = 0
        h_learn.fit(train_obs)

        trainll = fit_hmm_and_monitor_log_likelihood(h_learn, train_obs, n_iter)

        # Make sure we've converged to the right parameters.
        # a) means
        self.assertTrue(np.allclose(sorted(h.means_.tolist()),
                                    sorted(h_learn.means_.tolist()),
                                    1e-2))
        # b) covars are hard to estimate precisely from a relatively small
        #    sample, thus the large threshold
        self.assertTrue(np.allclose(sorted(h._covars_.tolist()),
                                    sorted(h_learn._covars_.tolist()),
                                    10))
Beispiel #52
0
    def test_fit_with_priors(self, params='stmc', n_iter=5, verbose=False):
        startprob_prior = 10 * self.startprob + 2.0
        transmat_prior = 10 * self.transmat + 2.0
        means_prior = self.means
        means_weight = 2.0
        covars_weight = 2.0
        if self.covariance_type in ('full', 'tied'):
            covars_weight += self.n_features
        covars_prior = self.covars[self.covariance_type]

        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.startprob_prior = startprob_prior
        h.transmat_ = normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.transmat_prior = transmat_prior
        h.means_ = 20 * self.means
        h.means_prior = means_prior
        h.means_weight = means_weight
        h.covars_ = self.covars[self.covariance_type]
        h.covars_prior = covars_prior
        h.covars_weight = covars_weight

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10)[0] for x in range(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(train_obs[:1])

        trainll = train_hmm_and_keep_track_of_log_likelihood(
            h, train_obs, n_iter=n_iter, params=params)[1:]

        # Check that the loglik is always increasing during training
        if not np.all(np.diff(trainll) > 0) and verbose:
            print('Test MAP train: %s (%s)\n  %s\n  %s'
                  % (self.covariance_type, params, trainll, np.diff(trainll)))
        # XXX: Why such a large tolerance?
        self.assertTrue(np.all(np.diff(trainll) > -0.5))
Beispiel #53
0
    def test_fit(self, params='stmc', n_iter=5, **kwargs):
        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.transmat_ = normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.means_ = 20 * self.means
        h.covars_ = self.covars[self.covariance_type]

        # Create training data by sampling from the HMM.
        train_obs = [h.sample(n=10)[0] for x in range(10)]

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(train_obs)

        trainll = fit_hmm_and_monitor_log_likelihood(h, train_obs, n_iter)

        # Check that the log-likelihood is always increasing during training.
        diff = np.diff(trainll)
        message = ("Decreasing log-likelihood for {0} covariance: {1}"
                   .format(self.covariance_type, diff))
        self.assertTrue(np.all(diff >= -1e-6), message)
Beispiel #54
0
    def test_fit(self, params='stmc', n_iter=5, **kwargs):
        h = hmm.GaussianHMM(self.n_components, self.covariance_type)
        h.startprob_ = self.startprob
        h.transmat_ = normalize(
            self.transmat + np.diag(self.prng.rand(self.n_components)), 1)
        h.means_ = 20 * self.means
        h.covars_ = self.covars[self.covariance_type]

        lengths = [10] * 10
        X, _state_sequence = h.sample(sum(lengths), random_state=self.prng)

        # Mess up the parameters and see if we can re-learn them.
        h.n_iter = 0
        h.fit(X, lengths=lengths)

        trainll = fit_hmm_and_monitor_log_likelihood(
            h, X, lengths=lengths, n_iter=n_iter)

        # Check that the log-likelihood is always increasing during training.
        diff = np.diff(trainll)
        message = ("Decreasing log-likelihood for {0} covariance: {1}"
                   .format(self.covariance_type, diff))
        self.assertTrue(np.all(diff >= -1e-6), message)
Beispiel #55
0
def test_normalize():
    A = np.random.normal((128, 4), 42.)
    normalize(A)
    assert_almost_equal(A.sum(), 1.)