def fit(self, train, test, n_states, train_lag_time, repetition, args, outfile): n_features = train[0].shape[1] kwargs = dict(n_states=n_states, n_features=n_features, n_init=args.n_init, n_em_iter=args.n_em_iter, n_lqa_iter=args.n_lqa_iter, fusion_prior=args.fusion_prior, thresh=args.thresh, reversible_type=args.reversible_type, platform=args.platform) model = GaussianFusionHMM(**kwargs) start = time.time() model.fit(train) end = time.time() result = { 'model': 'GaussianFusionHMM', 'timescales': (np.real(model.timescales_) * train_lag_time).tolist(), 'transmat': np.real(model.transmat_).tolist(), 'populations': np.real(model.populations_).tolist(), 'n_states': model.n_states, 'split': args.split, 'fusion_prior': args.fusion_prior, 'train_lag_time': train_lag_time, 'train_time': end - start, 'means': np.real(model.means_).tolist(), 'vars': np.real(model.vars_).tolist(), 'train_logprob': model.fit_logprob_[-1], 'n_train_observations': sum(len(t) for t in train), 'n_test_observations': sum(len(t) for t in test), 'train_logprobs': model.fit_logprob_, #'test_lag_time': args.test_lag_time, 'cross_validation_fold': 0, 'cross_validation_nfolds': 1, 'repetition': repetition, } # model.transmat_ = contraction(model.transmat_, float(train_lag_time) / float(args.test_lag_time)) # Don't do any contraction -- train and test at the same lagtime result['test_logprob'] = model.score(test) result['test_lag_time'] = train_lag_time if not np.all(np.isfinite(model.transmat_)): print('Nonfinite numbers in transmat !!') json.dump(result, outfile) outfile.write('\n')
def test_plusmin(): # Set constants n_hotstart = 3 n_em_iter = 3 n_experiments = 1 n_seq = 1 T = 2000 gamma = 512. # Generate data plusmin = PlusminModel() data, hidden = plusmin.generate_dataset(n_seq, T) n_features = plusmin.x_dim n_components = plusmin.K # Train MSLDS mslds_scores = [] l = MetastableSwitchingLDS(n_components, n_features, n_hotstart=n_hotstart, n_em_iter=n_em_iter, n_experiments=n_experiments) l.fit(data, gamma=gamma) mslds_score = l.score(data) print("gamma = %f" % gamma) print("MSLDS Log-Likelihood = %f" % mslds_score) print() # Fit Gaussian HMM for comparison g = GaussianFusionHMM(plusmin.K, plusmin.x_dim) g.fit(data) hmm_score = g.score(data) print("HMM Log-Likelihood = %f" % hmm_score) print() # Plot sample from MSLDS sim_xs, sim_Ss = l.sample(T, init_state=0, init_obs=plusmin.mus[0]) sim_xs = np.reshape(sim_xs, (n_seq, T, plusmin.x_dim)) plt.close('all') plt.figure(1) plt.plot(range(T), data[0], label="Observations") plt.plot(range(T), sim_xs[0], label='Sampled Observations') plt.legend() plt.show()
def test_doublewell(): import pdb, traceback, sys try: n_components = 2 n_features = 1 n_em_iter = 1 n_experiments = 1 tol=1e-1 data = load_doublewell(random_state=0)['trajectories'] T = len(data[0]) # Fit MSLDS model model = MetastableSwitchingLDS(n_components, n_features, n_experiments=n_experiments, n_em_iter=n_em_iter) model.fit(data, gamma=.1, tol=tol) mslds_score = model.score(data) print("MSLDS Log-Likelihood = %f" % mslds_score) # Fit Gaussian HMM for comparison g = GaussianFusionHMM(n_components, n_features) g.fit(data) hmm_score = g.score(data) print("HMM Log-Likelihood = %f" % hmm_score) print() # Plot sample from MSLDS sim_xs, sim_Ss = model.sample(T, init_state=0) plt.close('all') plt.figure(1) plt.plot(range(T), data[0], label="Observations") plt.plot(range(T), sim_xs, label='Sampled Observations') plt.legend() plt.show() except: type, value, tb = sys.exc_info() traceback.print_exc() pdb.post_mortem(tb)
def test_viterbi(): data = [np.random.randn(1000, 3) + np.tile(np.sin(np.arange(1000)/100.0), (3,1)).T] model1 = GaussianFusionHMM(n_states=2, n_features=3, platform='sklearn').fit(data) model2 = GaussianFusionHMM(n_states=2, n_features=3, platform='cpu').fit(data) model2.means_ = model1.means_ model2.vars_ = model1.vars_ model2.transmat_ = model1.transmat_ model2.populations_ = model1.populations_ logprob1, seq1 = model1.predict(data) logprob2, seq2 = model2.predict(data) np.testing.assert_almost_equal(logprob1, logprob2, decimal=3) np.testing.assert_array_equal(seq1[0], seq2[0]) if PLOT: import matplotlib.pyplot as pp pp.plot(data[0][:, 0], label='data') pp.plot(seq1[0], lw='5', label='viterbi') pp.legend() pp.show()
def fit(self, train, test, n_states, train_lag_time, fold, args, outfile): kwargs = dict(n_states=n_states, n_features=self.n_features, n_em_iter=args.n_em_iter, n_lqa_iter = args.n_lqa_iter, fusion_prior=args.fusion_prior, thresh=args.thresh, reversible_type=args.reversible_type, platform=args.platform) print(kwargs) model = GaussianFusionHMM(**kwargs) start = time.time() model.fit(train) end = time.time() result = { 'model': 'GaussianFusionHMM', 'timescales': (np.real(model.timescales_()) * train_lag_time).tolist(), 'transmat': np.real(model.transmat_).tolist(), 'populations': np.real(model.populations_).tolist(), 'n_states': model.n_states, 'split': args.split, 'fusion_prior': args.fusion_prior, 'train_lag_time': train_lag_time, 'train_time': end - start, 'means': np.real(model.means_).tolist(), 'vars': np.real(model.vars_).tolist(), 'train_logprob': model.fit_logprob_[-1], 'n_train_observations': sum(len(t) for t in train), 'n_test_observations': sum(len(t) for t in test), 'train_logprobs': model.fit_logprob_, #'test_lag_time': args.test_lag_time, 'cross_validation_fold': fold, 'cross_validation_nfolds': args.n_cv, } # model.transmat_ = contraction(model.transmat_, float(train_lag_time) / float(args.test_lag_time)) # Don't do any contraction -- train and test at the same lagtime result['test_logprob'] = model.score(test) result['test_lag_time'] = train_lag_time if not np.all(np.isfinite(model.transmat_)): print('Nonfinite numbers in transmat !!') json.dump(result, outfile) outfile.write('\n')
def test_muller_potential(): import pdb, traceback, sys try: # Set constants n_hotstart = 3 n_em_iter = 3 n_experiments = 1 n_seq = 1 num_trajs = 1 T = 2500 sim_T = 2500 gamma = 200. # Generate data warnings.filterwarnings("ignore", category=DeprecationWarning) muller = MullerModel() data, trajectory, start = \ muller.generate_dataset(n_seq, num_trajs, T) n_features = muller.x_dim n_components = muller.K # Train MSLDS model = MetastableSwitchingLDS(n_components, n_features, n_hotstart=n_hotstart, n_em_iter=n_em_iter, n_experiments=n_experiments) model.fit(data, gamma=gamma) mslds_score = model.score(data) print("MSLDS Log-Likelihood = %f" % mslds_score) # Fit Gaussian HMM for comparison g = GaussianFusionHMM(n_components, n_features) g.fit(data) hmm_score = g.score(data) print("HMM Log-Likelihood = %f" % hmm_score) # Clear Display plt.cla() plt.plot(trajectory[start:, 0], trajectory[start:, 1], color='k') plt.scatter(model.means_[:, 0], model.means_[:, 1], color='r', zorder=10) plt.scatter(data[0][:, 0], data[0][:, 1], edgecolor='none', facecolor='k', zorder=1) Delta = 0.5 minx = min(data[0][:, 0]) maxx = max(data[0][:, 0]) miny = min(data[0][:, 1]) maxy = max(data[0][:, 1]) sim_xs, sim_Ss = model.sample(sim_T, init_state=0, init_obs=model.means_[0]) minx = min(min(sim_xs[:, 0]), minx) - Delta maxx = max(max(sim_xs[:, 0]), maxx) + Delta miny = min(min(sim_xs[:, 1]), miny) - Delta maxy = max(max(sim_xs[:, 1]), maxy) + Delta plt.scatter(sim_xs[:, 0], sim_xs[:, 1], edgecolor='none', zorder=5, facecolor='g') plt.plot(sim_xs[:, 0], sim_xs[:, 1], zorder=5, color='g') MullerForce.plot(ax=plt.gca(), minx=minx, maxx=maxx, miny=miny, maxy=maxy) plt.show() except: type, value, tb = sys.exc_info() traceback.print_exc() pdb.post_mortem(tb)
def test_alanine_dipeptide(): import pdb, traceback, sys warnings.filterwarnings("ignore", category=DeprecationWarning) try: b = fetch_alanine_dipeptide() trajs = b.trajectories n_seq = len(trajs) n_frames = trajs[0].n_frames n_atoms = trajs[0].n_atoms n_features = n_atoms * 3 sim_T = 1000 data_home = get_data_home() data_dir = join(data_home, TARGET_DIRECTORY_ALANINE) top = md.load(join(data_dir, 'ala2.pdb')) n_components = 2 # Superpose m data = [] for traj in trajs: traj.superpose(top) Z = traj.xyz Z = np.reshape(Z, (len(Z), n_features), order='F') data.append(Z) # Fit MSLDS model n_experiments = 1 n_em_iter = 1 tol = 1e-1 model = MetastableSwitchingLDS(n_components, n_features, n_experiments=n_experiments, n_em_iter=n_em_iter) model.fit(data, gamma=.1, tol=tol, verbose=True) mslds_score = model.score(data) print("MSLDS Log-Likelihood = %f" % mslds_score) # Fit Gaussian HMM for comparison g = GaussianFusionHMM(n_components, n_features) g.fit(data) hmm_score = g.score(data) print("HMM Log-Likelihood = %f" % hmm_score) print() # Generate a trajectory from learned model. sample_traj, hidden_states = model.sample(sim_T) states = [] for k in range(n_components): states.append([]) # Presort the data into the metastable wells for k in range(n_components): for i in range(len(trajs)): traj = trajs[i] Z = traj.xyz Z = np.reshape(Z, (len(Z), n_features), order='F') logprob = log_multivariate_normal_density(Z, np.array(model.means_), np.array(model.covars_), covariance_type='full') assignments = np.argmax(logprob, axis=1) #probs = np.max(logprob, axis=1) # pick structures that have highest log probability in state s = traj[assignments == k] states[k].append(s) # Pick frame from original trajectories closest to current sample gen_traj = None for t in range(sim_T): h = hidden_states[t] best_logprob = -np.inf best_frame = None for i in range(len(trajs)): if t > 0: states[h][i].superpose(gen_traj, t-1) Z = states[h][i].xyz Z = np.reshape(Z, (len(Z), n_features), order='F') mean = sample_traj[t] logprobs = log_multivariate_normal_density(Z, mean, model.Qs_[h], covariance_type='full') ind = np.argmax(logprobs, axis=0) logprob = logprobs[ind] if logprob > best_log_prob: logprob = best_logprob best_frame = states[h][i][ind] if t == 0: gen_traj = best_frame else: gen_traj = gen_traj.join(frame) gen_traj.save('%s.xtc' % self.out) gen_traj[0].save('%s.xtc.pdb' % self.out) except: type, value, tb = sys.exc_info() traceback.print_exc() pdb.post_mortem(tb)