def test_converged_by_iterations(self): m = ConvergenceMonitor(tol=1e-3, n_iter=2, verbose=False) assert not m.converged m.report(-0.01) assert not m.converged m.report(-0.1) assert m.converged
def test_report_first_iteration(self, capsys): m = ConvergenceMonitor(tol=1e-3, n_iter=10, verbose=True) m.report(-0.01) out, err = capsys.readouterr() assert not out expected = m._template.format(iter=1, logprob=-0.01, delta=float("nan")) assert err.splitlines() == [expected]
def test_reset(self): m = ConvergenceMonitor(tol=1e-3, n_iter=10, verbose=False) m.iter = 1 m.history.append(-0.01) m._reset() assert m.iter == 0 assert not m.history
def fit(self, X, lengths=None): X = check_array(X) self._init(X, lengths=lengths) self._check() self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose) for iter in range(self.n_iter): stats = self._initialize_sufficient_statistics() curr_logprob = 0 framelogprob = self._compute_log_likelihood(X) logprob, fwdlattice = self._do_forward_pass(framelogprob) curr_logprob += logprob bwdlattice = self._do_backward_pass(framelogprob) posteriors = self._compute_posteriors(fwdlattice, bwdlattice) self._accumulate_sufficient_statistics( stats, X, framelogprob, posteriors, fwdlattice, bwdlattice) # XXX must be before convergence check, because otherwise # there won't be any updates for the case ``n_iter=1``. self._do_mstep(stats) self.monitor_.report(curr_logprob) if self.monitor_.converged: self.framelogprob = framelogprob break return self
def __init__(self, config: dict): if "tol" in config["train"] and isinstance(config["train"]["tol"], str): config["train"]["tol"] = { "-inf": -np.inf, "inf": np.inf }[config["train"]["tol"]] self.gmm_hmm = _GMMHMM(**config["parameters"]) self.gmm_hmm.monitor_ = ConvergenceMonitor(*(config["train"][key] for key in ("tol", "n_iter", "verbose"))) self.iepoch = 1 self.rand_inits = (config["train"].get("weight_rand_init", 0), config["train"].get("mean_rand_init", 0), config["train"].get("covar_rand_init", 0)) self.limit_inits = ( config["train"].get("weight_min_init", 0), config["train"].get("covar_min_init", 0), ) self.rescale = config["train"].get("rescale_samples", False) if self.rescale: self.means = None self.stddevs = None
def test_converged_by_logprob(self): m = ConvergenceMonitor(tol=1e-3, n_iter=10, verbose=False) for logprob in [-0.03, -0.02, -0.01]: m.report(logprob) assert not m.converged m.report(-0.0101) assert m.converged
def fit(self, X, lengths=None): """Estimate model parameters. An initialization step is performed before entering the EM algorithm. If you want to avoid this step for a subset of the parameters, pass proper ``init_params`` keyword argument to estimator's constructor. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ) Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. Returns ------- self : object Returns self. """ X = check_array(X) self._init(X, lengths=lengths) self._check() self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose) for iter in range(self.n_iter): stats = self._initialize_sufficient_statistics() curr_logprob = 0 for i, j in iter_from_X_lengths(X, lengths): framelogprob = self._compute_log_likelihood(X[i:j]) logprob, fwdlattice = self._do_forward_pass(framelogprob) curr_logprob += logprob bwdlattice = self._do_backward_pass(framelogprob) posteriors = self._compute_posteriors(fwdlattice, bwdlattice) # fix posteriors if self.states_prior is not None and self.fp_state is not None: for k in range(len(self.states_prior)): if self.states_prior[k] == 0: # non footprint states posteriors[k][self.fp_state] = 0.0 posteriors[k] = posteriors[k] / sum(posteriors[k]) elif self.states_prior[k] == 1: # footprint states posteriors[k] = 0.0 / sum(posteriors[k]) posteriors[k][self.fp_state] = 1.0 self._accumulate_sufficient_statistics(stats, X[i:j], framelogprob, posteriors, fwdlattice, bwdlattice) self._do_mstep(stats) self.monitor_.report(curr_logprob) if self.monitor_.converged: break return self
def test_report(self, capsys): n_iter = 10 m = ConvergenceMonitor(tol=1e-3, n_iter=n_iter, verbose=True) for i in reversed(range(n_iter)): m.report(-0.01 * i) out, err = capsys.readouterr() assert not out assert len(err.splitlines()) == n_iter
def fit(self, X, y, lengths=None, valid_data=None): ''' Trains SpaMHMM on data X, y, using the EM algorithm. Inputs: X - np.array of size (n_samples, n_features). y - np.int of size n_sequences, whose entries are in the range [0, n_nodes-1]. lengths - list containing the lengths of each individual sequence in X, with size n_sequences. valid_data - tuple (X_valid, y_valid, lengths_valid) containing the validation data; if validation data is given, the model with the lowest validation loss is saved in a pickle file (optional, default:None). ''' if type(X) == list: lengths = [x.shape[0] for x in X] X = np.concatenate(X) y = np.array(y) self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, False) if valid_data is not None: X_valid, y_valid, lengths_valid = valid_data if type(X_valid) == list: lengths_valid = [x.shape[0] for x in X_valid] X_valid = np.concatenate(X_valid) y_valid = np.array(y_valid) max_validscore = float('-inf') validloss_hist = [] if self.graph is not None: self.reg_ = True else: self.reg_ = False self.init_params(X) self._check() prevscore = float('-inf') trainloss_hist = [] for it in range(self.n_iter): t0 = time.time() stats = self._compute_sufficient_statistics(X, y, lengths) self._do_mstep(stats) print('trans0 {}'.format(self.mixModels[0].transmat_)) print('trans1 {}'.format(self.mixModels[1].transmat_)) t1 = time.time() currscore = self.score(X, y, lengths) trainloss_hist.append(-currscore) if valid_data is not None: validscore = self.score(X_valid, y_valid, lengths_valid) validloss_hist.append(-validscore) if validscore > max_validscore: max_validscore = validscore f = open(self.name + '.pkl', 'wb') pickle.dump(self, f) if self.verbose: if (not self.reg_) and (prevscore > currscore): print( 'WARNING: loss has increased at iteration {}!'.format( it)) print('prev loss = {:.5f}, curr loss = {:.5f}'.format( -prevscore, -currscore)) elif valid_data is not None: print('it {}: train loss = {:.5f}, valid loss = {:.5f}, ' '{:.3f} sec/it'.format(it + 1, -currscore, -validscore, t1 - t0)) else: print('it {}: loss = {:.5f}, {:.3f} sec/it'.format( it + 1, -currscore, t1 - t0)) ll = np.sum(self.scores_per_seq(X, y, lengths)) print("ll: {}".format(ll)) # ll = 0 # for m in range(self.mix_dim): # ll += np.max(self.mixModels[m]._compute_log_likelihood(X)) self.monitor_.report(currscore) # self.monitor_.report(ll) # print("ll: {}".format(ll)) if self.monitor_.converged: if self.verbose: print( 'Loss improved less than {}. Training stopped.'.format( self.tol)) break prevscore = currscore if valid_data: return trainloss_hist, validloss_hist else: return trainloss_hist
def fit(self, X, y, lengths=None, valid_data=None): ''' Trains SpaMHMM on data X, y, using the EM algorithm. Inputs: X - np.array of size (n_samples, n_features). y - np.int of size n_sequences, whose entries are in the range [0, n_nodes-1]. lengths - list containing the lengths of each individual sequence in X, with size n_sequences. valid_data - tuple (X_valid, y_valid, lengths_valid) containing the validation data; if validation data is given, the model with the lowest validation loss is saved in a pickle file (optional, default:None). ''' if type(X) == list: lengths = [x.shape[0] for x in X] X = np.concatenate(X) y = np.array(y) self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, False) if valid_data is not None: X_valid, y_valid, lengths_valid = valid_data if type(X_valid) == list: lengths_valid = [x.shape[0] for x in X_valid] X_valid = np.concatenate(X_valid) y_valid = np.array(y_valid) max_validscore = float('-inf') validloss_hist = [] if self.graph is not None: self.reg_ = True else: self.reg_ = False self.init_params(X) self._check() for m in range(self.mix_dim): #self.mixModels[m].means_ = np.array([np.min(X), np.max(X) / 4, np.max(X) / 2, np.max(X)])[:, np.newaxis] self.mixModels[m].means_ = np.sort( self.mixModels[m].means_.flatten())[:, np.newaxis] # #self.mixModels[m].means_ = np.array([4, 9, 17, 25])[:, np.newaxis] prevscore = float('-inf') trainloss_hist = [] for it in range(self.n_iter): t0 = time.time() stats = self._compute_sufficient_statistics(X, y, lengths) #similarity = diversified_hmm.get_kernel(self.mixModels[0].transmat_, self.mixModels[1].transmat_) * 1000000000000000 norm0 = math.sqrt( np.trace(self.mixModels[0].transmat_.dot( self.mixModels[0].transmat_))) norm1 = math.sqrt( np.trace(self.mixModels[1].transmat_.dot( self.mixModels[1].transmat_))) similarity = np.trace(self.mixModels[0].transmat_.dot( self.mixModels[1].transmat_)) / (norm0 * norm1) from scipy.linalg import det #similarity = diversified_hmm.get_prior(np.dot(self.mixModels[0].transmat_.T, self.mixModels[1].transmat_)) self.mixModels[ 0].similarity = similarity #diversified_hmm.calculate_entropy(self.mixModels[0].transmat_) self.mixModels[ 1].similarity = similarity #diversified_hmm.calculate_entropy(self.mixModels[1].transmat_) #print('similarity {}'.format(similarity)) self.mixModels[0].iter = it # if it == 0: # self.mixModels[0].other_trans = np.array([[.1, 0, .3, .6], [.1, 0, .5, .4], [0, .4, .4, .2], [.2, .2, .2, .4]]) # #self.mixModels[1].other_trans = np.array([[.7, .1, .2, 0], [.1, 0, .2, .7], [.5, .1, .4, 0], [0, .2, .1, .7]]) # else: self.mixModels[0].other_trans = self.mixModels[1].transmat_ self.mixModels[1].other_trans = self.mixModels[0].transmat_ #self._do_mstep(stats) if self.reg_: self._fit_coef(stats) else: self.mixCoef = stats['mix_post'] normalize(self.mixCoef, axis=1) self.mixModels[0].transmat_prior = stats['trans_prior' + str(0)] self.mixModels[0]._do_mstep(stats['mix_idx' + str(0)]) self.mixModels[1].other_trans = self.mixModels[ 0].transmat_ # take the update from this iter self.mixModels[1].transmat_prior = stats['trans_prior' + str(1)] self.mixModels[1]._do_mstep(stats['mix_idx' + str(1)]) #### print('trans0 {}'.format(self.mixModels[0].transmat_)) print('trans1 {}'.format(self.mixModels[1].transmat_)) t1 = time.time() currscore = self.score(X, y, lengths) trainloss_hist.append(-currscore) if valid_data is not None: validscore = self.score(X_valid, y_valid, lengths_valid) validloss_hist.append(-validscore) if validscore > max_validscore: max_validscore = validscore f = open(self.name + '.pkl', 'wb') pickle.dump(self, f) if self.verbose: if (not self.reg_) and (prevscore > currscore): print( 'WARNING: loss has increased at iteration {}!'.format( it)) print('prev loss = {:.5f}, curr loss = {:.5f}'.format( -prevscore, -currscore)) elif valid_data is not None: print('it {}: train loss = {:.5f}, valid loss = {:.5f}, ' '{:.3f} sec/it'.format(it + 1, -currscore, -validscore, t1 - t0)) else: print('it {}: loss = {:.5f}, {:.3f} sec/it'.format( it + 1, -currscore, t1 - t0)) ll = np.sum(self.scores_per_seq(X, y, lengths)) print("ll: {}".format(ll)) # ll = 0 # for m in range(self.mix_dim): # ll += np.max(self.mixModels[m]._compute_log_likelihood(X)) self.monitor_.report(currscore) # self.monitor_.report(ll) # print("ll: {}".format(ll)) if self.monitor_.converged: if self.verbose: print( 'Loss improved less than {}. Training stopped.'.format( self.tol)) break prevscore = currscore if valid_data: return trainloss_hist, validloss_hist else: return trainloss_hist
def fit(self, X, lengths=None): X = check_array(X) self._init(X, lengths=lengths) self._check() self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose) for iter in range(self.n_iter): print('iteration: {}'.format(iter)) stats = self._initialize_sufficient_statistics() curr_logprob = 0 tt = 0 path_list = list() for i, j in iter_from_X_lengths(X, lengths): logprob, state_sequence = self.decode(X[i:j], algorithm="viterbi") curr_logprob += logprob epsilon = np.zeros((state_sequence.shape[0] - 1, self.n_components, self.n_components)) gamma = np.zeros((state_sequence.shape[0], self.n_components)) for t in range(state_sequence.shape[0] - 1): epsilon[t, state_sequence[t], state_sequence[t + 1]] = 1 for t in range(state_sequence.shape[0]): for i in range(self.n_components): if t != (state_sequence.shape[0] - 1): gamma[t, i] = np.sum(epsilon[t, i]) else: gamma[t, i] = gamma[t - 1, i] path_list.append(state_sequence) self._accumulate_sufficient_statistics(stats, X[i:j], epsilon, gamma, state_sequence, None) tt += 1 print('average loss: {}'.format(curr_logprob / tt)) if not fast_update: stats['start'] /= tt stats['trans'] /= tt self._do_mstep(stats) if update_dnn: temp_path = np.zeros((0, 1)) for k, (i, j) in enumerate(iter_from_X_lengths(X, lengths)): temp_path = np.vstack( [temp_path, np.array(path_list[k]).reshape(-1, 1)]) self.mlp.train(X, temp_path, 20) acoustic_model = np.zeros(self.n_components) for i, j in iter_from_X_lengths(X, lengths): logprob, state_sequence = self.decode(X[i:j], algorithm="viterbi") for state in state_sequence: acoustic_model[state] += 1 self.aucoustic_model = acoustic_model / np.sum(acoustic_model) self.monitor_.report(curr_logprob) if self.monitor_.iter == self.monitor_.n_iter or \ (len(self.monitor_.history) == 2 and abs(self.monitor_.history[1] - self.monitor_.history[0]) < self.monitor_.tol * abs( self.monitor_.history[1])): break print('----------------------------------------------') return self