def fit_bootstrap(yaml_file,pool=None):
    mdl_params = yaml_file["mdl_params"]
    current_mdl_params={}
    bootstrap_mdl_params={}

    for i in mdl_params.keys():
        if i.startswith("msm__"):
            current_mdl_params[i.split("msm__")[1]] = mdl_params[i]
        if i.startswith("bootstrap__"):
            bootstrap_mdl_params[i.split("bootstrap__")[1]] = mdl_params[i]

    if "n_samples" not in bootstrap_mdl_params.keys():
        bootstrap_mdl_params["n_samples"] = 100

    for protein in yaml_file["protein_list"]:
        with enter_protein_mdl_dir(yaml_file, protein):
            print(protein)
            assignments = verboseload("assignments.pkl")
            msm_mdl =BootStrapMarkovStateModel(n_procs=2,
                                               msm_args = current_mdl_params,
                                               **bootstrap_mdl_params)
            msm_mdl.fit([assignments[i] for i in assignments.keys()], pool=pool)
            verbosedump(msm_mdl, "bootstrap_msm_mdl.pkl")
            verbosedump(msm_mdl.mle_, "msm_mdl.pkl")
            fixed_assignments = {}
            for i in assignments.keys():
                fixed_assignments[i] = msm_mdl.mle_.transform(
                    assignments[i], mode='fill')[0]
            verbosedump(fixed_assignments, 'fixed_assignments.pkl')
    return            
Example #2
0
def test_mle_eq():
    seq = [[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1]]
    mle_mdl = MarkovStateModel(lag_time=1)
    b_mdl = BootStrapMarkovStateModel(n_samples=10,
                                      n_procs=2,
                                      msm_args={'lag_time': 1})
    mle_mdl.fit(seq)
    b_mdl.fit(seq)
    #make sure we have good model
    eq(mle_mdl.populations_, b_mdl.mle_.populations_)
    eq(mle_mdl.timescales_, b_mdl.mle_.timescales_)
Example #3
0
def test_resampler():
    sequences = [np.random.randint(20, size=100) for _ in range(100)]
    mdl = BootStrapMarkovStateModel(n_samples=5,
                                    n_procs=2,
                                    msm_args={'lag_time': 10})
    #probability that
    mdl.fit(sequences)
    #given a size of 100 input trajectories the probability that
    # we re-pick the original set is about (1/100)^100.
    # we test that the set of unique traj ids is never equal to
    #original 100 sets in all 5 samples
    for i in mdl.resample_ind_:
        assert len(np.unique(i)) != 100
Example #4
0
def test_score():
    seq = [
        np.random.randint(20, size=100),
        np.random.randint(20, size=100),
        np.random.randint(20, size=100)
    ]
    bmsm = BootStrapMarkovStateModel(n_samples=10,
                                     n_procs=2,
                                     msm_args={'lag_time': 1})
    bmsm.fit(seq)
    # test that all samples got a training score ...
    assert np.array(bmsm.all_training_scores_).shape[0] == 10
    # ... and that the training score wasn't NaN
    assert sum(np.isnan(bmsm.all_training_scores_)) == 0
    # test that a test score was attempted (OK if it's NaN)
    assert bmsm.n_samples == np.array(bmsm.all_test_scores_).shape[0]
Example #5
0
def test_mdl():
    mdl = BootStrapMarkovStateModel(n_samples=10,
                                    n_procs=2,
                                    msm_args={'lag_time': 10})