Ejemplo n.º 1
0
    def fit(self, train, test, n_states, train_lag_time, repetition, args, outfile):
        n_features = train[0].shape[1]
        kwargs = dict(n_states=n_states, n_features=n_features,
                      n_init=args.n_init,
                      n_em_iter=args.n_em_iter, n_lqa_iter=args.n_lqa_iter,
                      fusion_prior=args.fusion_prior, thresh=args.thresh,
                      reversible_type=args.reversible_type, platform=args.platform)
        model = GaussianFusionHMM(**kwargs)

        start = time.time()
        model.fit(train)
        end = time.time()

        result = {
            'model': 'GaussianFusionHMM',
            'timescales': (np.real(model.timescales_) * train_lag_time).tolist(),
            'transmat': np.real(model.transmat_).tolist(),
            'populations': np.real(model.populations_).tolist(),
            'n_states': model.n_states,
            'split': args.split,
            'fusion_prior': args.fusion_prior,
            'train_lag_time': train_lag_time,
            'train_time': end - start,
            'means': np.real(model.means_).tolist(),
            'vars': np.real(model.vars_).tolist(),
            'train_logprob': model.fit_logprob_[-1],
            'n_train_observations': sum(len(t) for t in train),
            'n_test_observations': sum(len(t) for t in test),
            'train_logprobs': model.fit_logprob_,
            #'test_lag_time': args.test_lag_time,
            'cross_validation_fold': 0,
            'cross_validation_nfolds': 1,
            'repetition': repetition,
        }

        # model.transmat_ = contraction(model.transmat_, float(train_lag_time) / float(args.test_lag_time))
        # Don't do any contraction -- train and test at the same lagtime
        result['test_logprob'] = model.score(test)
        result['test_lag_time'] = train_lag_time

        if not np.all(np.isfinite(model.transmat_)):
            print('Nonfinite numbers in transmat !!')

        json.dump(result, outfile)
        outfile.write('\n')
Ejemplo n.º 2
0
def test_plusmin():
    # Set constants
    n_hotstart = 3
    n_em_iter = 3
    n_experiments = 1
    n_seq = 1
    T = 2000
    gamma = 512.

    # Generate data
    plusmin = PlusminModel()
    data, hidden = plusmin.generate_dataset(n_seq, T)
    n_features = plusmin.x_dim
    n_components = plusmin.K

    # Train MSLDS
    mslds_scores = []
    l = MetastableSwitchingLDS(n_components, n_features,
            n_hotstart=n_hotstart, n_em_iter=n_em_iter,
            n_experiments=n_experiments)
    l.fit(data, gamma=gamma)
    mslds_score = l.score(data)
    print("gamma = %f" % gamma)
    print("MSLDS Log-Likelihood = %f" %  mslds_score)
    print()

    # Fit Gaussian HMM for comparison
    g = GaussianFusionHMM(plusmin.K, plusmin.x_dim)
    g.fit(data)
    hmm_score = g.score(data)
    print("HMM Log-Likelihood = %f" %  hmm_score)
    print()

    # Plot sample from MSLDS
    sim_xs, sim_Ss = l.sample(T, init_state=0, init_obs=plusmin.mus[0])
    sim_xs = np.reshape(sim_xs, (n_seq, T, plusmin.x_dim))
    plt.close('all')
    plt.figure(1)
    plt.plot(range(T), data[0], label="Observations")
    plt.plot(range(T), sim_xs[0], label='Sampled Observations')
    plt.legend()
    plt.show()
Ejemplo n.º 3
0
def test_doublewell():
    import pdb, traceback, sys
    try:
        n_components = 2
        n_features = 1
        n_em_iter = 1
        n_experiments = 1
        tol=1e-1

        data = load_doublewell(random_state=0)['trajectories']
        T = len(data[0])

        # Fit MSLDS model 
        model = MetastableSwitchingLDS(n_components, n_features,
            n_experiments=n_experiments, n_em_iter=n_em_iter)
        model.fit(data, gamma=.1, tol=tol)
        mslds_score = model.score(data)
        print("MSLDS Log-Likelihood = %f" %  mslds_score)

        # Fit Gaussian HMM for comparison
        g = GaussianFusionHMM(n_components, n_features)
        g.fit(data)
        hmm_score = g.score(data)
        print("HMM Log-Likelihood = %f" %  hmm_score)
        print()

        # Plot sample from MSLDS
        sim_xs, sim_Ss = model.sample(T, init_state=0)
        plt.close('all')
        plt.figure(1)
        plt.plot(range(T), data[0], label="Observations")
        plt.plot(range(T), sim_xs, label='Sampled Observations')
        plt.legend()
        plt.show()
    except:
        type, value, tb = sys.exc_info()
        traceback.print_exc()
        pdb.post_mortem(tb)
Ejemplo n.º 4
0
def test_viterbi():
    data = [np.random.randn(1000, 3) + np.tile(np.sin(np.arange(1000)/100.0), (3,1)).T]

    model1 = GaussianFusionHMM(n_states=2, n_features=3, platform='sklearn').fit(data)
    model2 = GaussianFusionHMM(n_states=2, n_features=3, platform='cpu').fit(data)

    model2.means_ = model1.means_
    model2.vars_ = model1.vars_
    model2.transmat_ = model1.transmat_
    model2.populations_ = model1.populations_

    logprob1, seq1 = model1.predict(data)
    logprob2, seq2 = model2.predict(data)

    np.testing.assert_almost_equal(logprob1, logprob2, decimal=3)
    np.testing.assert_array_equal(seq1[0], seq2[0])


    if PLOT:
        import matplotlib.pyplot as pp
        pp.plot(data[0][:, 0], label='data')
        pp.plot(seq1[0], lw='5', label='viterbi')
        pp.legend()
        pp.show()
Ejemplo n.º 5
0
    def fit(self, train, test, n_states, train_lag_time, fold, args, outfile):
        kwargs = dict(n_states=n_states, n_features=self.n_features, n_em_iter=args.n_em_iter,
            n_lqa_iter = args.n_lqa_iter, fusion_prior=args.fusion_prior,
            thresh=args.thresh, reversible_type=args.reversible_type,
                    platform=args.platform)
        print(kwargs)
        model = GaussianFusionHMM(**kwargs)

        start = time.time()
        model.fit(train)
        end = time.time()

        result = {
            'model': 'GaussianFusionHMM',
            'timescales': (np.real(model.timescales_()) * train_lag_time).tolist(),
            'transmat': np.real(model.transmat_).tolist(),
            'populations': np.real(model.populations_).tolist(),
            'n_states': model.n_states,
            'split': args.split,
            'fusion_prior': args.fusion_prior,
            'train_lag_time': train_lag_time,
            'train_time': end - start,
            'means': np.real(model.means_).tolist(),
            'vars': np.real(model.vars_).tolist(),
            'train_logprob': model.fit_logprob_[-1],
            'n_train_observations': sum(len(t) for t in train),
            'n_test_observations': sum(len(t) for t in test),
            'train_logprobs': model.fit_logprob_,
            #'test_lag_time': args.test_lag_time,
            'cross_validation_fold': fold,
            'cross_validation_nfolds': args.n_cv,
        }

        # model.transmat_ = contraction(model.transmat_, float(train_lag_time) / float(args.test_lag_time))
        # Don't do any contraction -- train and test at the same lagtime
        result['test_logprob'] = model.score(test)
        result['test_lag_time'] = train_lag_time

        if not np.all(np.isfinite(model.transmat_)):
            print('Nonfinite numbers in transmat !!')

        json.dump(result, outfile)
        outfile.write('\n')
Ejemplo n.º 6
0
def test_muller_potential():
    import pdb, traceback, sys
    try:
        # Set constants
        n_hotstart = 3
        n_em_iter = 3
        n_experiments = 1
        n_seq = 1
        num_trajs = 1
        T = 2500
        sim_T = 2500
        gamma = 200. 

        # Generate data
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        muller = MullerModel()
        data, trajectory, start = \
                muller.generate_dataset(n_seq, num_trajs, T)
        n_features = muller.x_dim
        n_components = muller.K

        # Train MSLDS
        model = MetastableSwitchingLDS(n_components, n_features,
            n_hotstart=n_hotstart, n_em_iter=n_em_iter,
            n_experiments=n_experiments)
        model.fit(data, gamma=gamma)
        mslds_score = model.score(data)
        print("MSLDS Log-Likelihood = %f" %  mslds_score)

        # Fit Gaussian HMM for comparison
        g = GaussianFusionHMM(n_components, n_features)
        g.fit(data)
        hmm_score = g.score(data)
        print("HMM Log-Likelihood = %f" %  hmm_score)

        # Clear Display
        plt.cla()
        plt.plot(trajectory[start:, 0], trajectory[start:, 1], color='k')
        plt.scatter(model.means_[:, 0], model.means_[:, 1], 
                    color='r', zorder=10)
        plt.scatter(data[0][:, 0], data[0][:, 1],
                edgecolor='none', facecolor='k', zorder=1)
        Delta = 0.5
        minx = min(data[0][:, 0])
        maxx = max(data[0][:, 0])
        miny = min(data[0][:, 1])
        maxy = max(data[0][:, 1])
        sim_xs, sim_Ss = model.sample(sim_T, init_state=0,
                init_obs=model.means_[0])

        minx = min(min(sim_xs[:, 0]), minx) - Delta
        maxx = max(max(sim_xs[:, 0]), maxx) + Delta
        miny = min(min(sim_xs[:, 1]), miny) - Delta
        maxy = max(max(sim_xs[:, 1]), maxy) + Delta
        plt.scatter(sim_xs[:, 0], sim_xs[:, 1], edgecolor='none',
                   zorder=5, facecolor='g')
        plt.plot(sim_xs[:, 0], sim_xs[:, 1], zorder=5, color='g')


        MullerForce.plot(ax=plt.gca(), minx=minx, maxx=maxx,
                miny=miny, maxy=maxy)
        plt.show()
    except:
        type, value, tb = sys.exc_info()
        traceback.print_exc()
        pdb.post_mortem(tb)
Ejemplo n.º 7
0
def test_alanine_dipeptide():
    import pdb, traceback, sys
    warnings.filterwarnings("ignore", 
                    category=DeprecationWarning)
    try:
        b = fetch_alanine_dipeptide()
        trajs = b.trajectories
        n_seq = len(trajs)
        n_frames = trajs[0].n_frames
        n_atoms = trajs[0].n_atoms
        n_features = n_atoms * 3
        sim_T = 1000
        data_home = get_data_home()
        data_dir = join(data_home, TARGET_DIRECTORY_ALANINE)
        top = md.load(join(data_dir, 'ala2.pdb'))
        n_components = 2
        # Superpose m
        data = []
        for traj in trajs:
            traj.superpose(top)
            Z = traj.xyz
            Z = np.reshape(Z, (len(Z), n_features), order='F')
            data.append(Z)

        # Fit MSLDS model 
        n_experiments = 1
        n_em_iter = 1
        tol = 1e-1
        model = MetastableSwitchingLDS(n_components, 
            n_features, n_experiments=n_experiments, 
            n_em_iter=n_em_iter) 
        model.fit(data, gamma=.1, tol=tol, verbose=True)
        mslds_score = model.score(data)
        print("MSLDS Log-Likelihood = %f" %  mslds_score)

        # Fit Gaussian HMM for comparison
        g = GaussianFusionHMM(n_components, n_features)
        g.fit(data)
        hmm_score = g.score(data)
        print("HMM Log-Likelihood = %f" %  hmm_score)
        print()

        # Generate a trajectory from learned model.
        sample_traj, hidden_states = model.sample(sim_T)
        states = []
        for k in range(n_components):
            states.append([])

        # Presort the data into the metastable wells
        for k in range(n_components):
            for i in range(len(trajs)):
                traj = trajs[i]
                Z = traj.xyz
                Z = np.reshape(Z, (len(Z), n_features), order='F')
                logprob = log_multivariate_normal_density(Z,
                    np.array(model.means_),
                    np.array(model.covars_), covariance_type='full')
                assignments = np.argmax(logprob, axis=1)
                #probs = np.max(logprob, axis=1)
                # pick structures that have highest log probability in state
                s = traj[assignments == k]
                states[k].append(s)

        # Pick frame from original trajectories closest to current sample
        gen_traj = None
        for t in range(sim_T):
            h = hidden_states[t]
            best_logprob = -np.inf
            best_frame = None
            for i in range(len(trajs)):
                if t > 0:
                    states[h][i].superpose(gen_traj, t-1)
                Z = states[h][i].xyz
                Z = np.reshape(Z, (len(Z), n_features), order='F')
                mean = sample_traj[t]
                logprobs = log_multivariate_normal_density(Z,
                    mean, model.Qs_[h], covariance_type='full')
                ind = np.argmax(logprobs, axis=0)
                logprob = logprobs[ind]
                if logprob > best_log_prob:
                    logprob = best_logprob
                    best_frame = states[h][i][ind]
            if t == 0:
                gen_traj = best_frame
            else:
                gen_traj = gen_traj.join(frame)
        gen_traj.save('%s.xtc' % self.out)
        gen_traj[0].save('%s.xtc.pdb' % self.out)
    except:
        type, value, tb = sys.exc_info()
        traceback.print_exc()
        pdb.post_mortem(tb)