예제 #1
0
def test_that_all_featurizers_run():
    # TODO: include all featurizers, perhaps with generator tests

    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)

    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all = featurizer.transform(trajectories)

    featurizer = SuperposeFeaturizer(np.arange(15), trj0)
    X_all = featurizer.transform(trajectories)

    featurizer = DihedralFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    featurizer = VonMisesFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    # Below doesn't work on ALA dipeptide
    # featurizer = msmbuilder.featurizer.ContactFeaturizer()
    # X_all = featurizer.transform(trajectories)

    featurizer = RMSDFeaturizer(trj0)
    X_all = featurizer.transform(trajectories)
예제 #2
0
def read_and_featurize(filename, dihedrals=['chi2'], stride=10):
	#print("reading and featurizing %s" %(filename))
	top = md.load_frame(filename, 0).topology
	#print("got top")
	atom_indices = [a.index for a in top.atoms if a.residue.resSeq == 93 and a.residue != "POPC" and str(a.residue)[0] == "H"]
	print((len(atom_indices)))
	#atom_indices = [a.index for a in top.atoms if a.residue.chain.index == 0 and a.residue.resSeq != 93 and a.residue != "POPC" and a.residue.resSeq != 130 and a.residue.resSeq != 172 and a.residue.resSeq != 79 and a.residue.resSeq != 341]
	#print("got indices")
	traj = md.load(filename, stride=1000, atom_indices=atom_indices)
	#print("got traj")
	featurizer = DihedralFeaturizer(types = dihedrals)
	features = featurizer.transform(traj_list = traj)
	#print(np.shape(features))
	#print("finished featurizing")

	directory = filename.split("/")
	condition = directory[len(directory)-2]
	dcd_file = directory[len(directory)-1]
	new_file = "%s_features_stride%d.h5" %(dcd_file.rsplit( ".", 1 )[ 0 ] , stride)
	new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features"
	new_condition_dir = "%s/%s" %(new_root_dir, condition)

	new_file_full = "%s/%s/%s" %(new_root_dir, condition, new_file)
	#print("saving features as %s" %new_file_full)

	verbosedump(features, new_file_full)
	return features
예제 #3
0
def test_function_featurizer():
    trajectories = AlanineDipeptide().get_cached().trajectories
    trj0 = trajectories[0]

    # use the dihedral to compute phi for ala
    atom_ind = [[4, 6, 8, 14]]
    func = compute_dihedrals
    # test with args
    f = FunctionFeaturizer(func, func_args={"indices": atom_ind})
    res1 = f.transform([trj0])

    # test with function in a function without any args
    def funcception(trj):
        return compute_phi(trj)[1]

    f = FunctionFeaturizer(funcception)
    res2 = f.transform([trj0])

    # know results
    f3 = DihedralFeaturizer(['phi'], sincos=False)
    res3 = f3.transform([trj0])

    # compare all
    for r in [res2, res3]:
        np.testing.assert_array_almost_equal(res1, r)
예제 #4
0
def test_pipeline():
    trajs = AlanineDipeptide().get_cached().trajectories
    p = Pipeline([('diheds', DihedralFeaturizer(['phi', 'psi'], sincos=False)),
                  ('hmm', VonMisesHMM(n_states=4))])

    predict = p.fit_predict(trajs)
    p.named_steps['hmm'].summarize()
예제 #5
0
def _test_tic_sampling(yaml_file, protein_name, tic_list, n_frames, scheme):
    #test to make sure we are sampling right
    sample_for_all_proteins(yaml_file, [protein_name],
                            tic_list, n_frames, scheme=scheme)
    ser = ProteinSeries(yaml_file)
    prt = Protein(ser, protein_name)

    for tic_index in [0,1]:
        traj_path = os.path.join(base_dir,yaml_file["mdl_dir"],
                                 protein_name,"tic%d.xtc"%tic_index)
        traj_top = os.path.join(base_dir,yaml_file["mdl_dir"],
                                protein_name, "prot.pdb")
        tica_traj = mdt.load(traj_path,top=traj_top)
        print(tica_traj.n_frames)
        feat = DihedralFeaturizer(types=['phi', 'psi','chi1'])

        f = feat.partial_transform(tica_traj)
        t_f = np.round(prt.tica_mdl.transform([f]))

        #check that the tic goes from min to max
        print("Look here",t_f[0])
        assert t_f[0][0][tic_index] <= t_f[0][-1][tic_index]
        all_vals = []
        for traj_tica_data in prt.tica_data.values():
            all_vals.extend(traj_tica_data[:,tic_index])
            #sort it because all three sampling schemes use it

        all_vals = np.round(np.sort(all_vals))
        print(tic_index)
        print(t_f[0][:,tic_index] >= all_vals[0])
        print(t_f[0][:,tic_index] <= all_vals[-1])
        #make sure the frames are within limitsss
        assert (t_f[0][:,tic_index] >= all_vals[0]).all()
        assert (t_f[0][:,tic_index] <= all_vals[-1]).all()
    return True
예제 #6
0
def read_and_featurize_divided(filename,
                               dihedrals=['phi', 'psi', 'chi2'],
                               stride=10):
    #print("reading and featurizing %s" %(filename))

    traj_top = md.load_frame(filename, 0).topology
    atom_indices = [
        a.index for a in traj_top.atoms if a.residue.name[0:2] != "HI"
    ]

    traj = md.load(filename, atom_indices=atom_indices)
    #print("got traj")
    featurizer = DihedralFeaturizer(types=dihedrals)
    features = featurizer.transform(traj_list=traj)
    #print(np.shape(features))
    #print("finished featurizing")

    directory = filename.split("/")
    condition = directory[len(directory) - 2]
    dcd_file = directory[len(directory) - 1]
    new_file = "%s_features_stride%d.h5" % (dcd_file.rsplit(".", 1)[0], stride)
    new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features"
    new_condition_dir = "%s/%s" % (new_root_dir, condition)

    new_file_full = "%s/%s/%s" % (new_root_dir, condition, new_file)
    #print("saving features as %s" %new_file_full)

    verbosedump(features, new_file_full)
    return features
예제 #7
0
def read_and_featurize(filename, dihedrals=['phi', 'psi', 'chi2'], stride=10):
    print(("reading and featurizing %s" % (filename)))

    traj = md.load(filename)
    #test_traj_init = md.load_frame(filename,5)
    #test_traj_init.save_pdb("/scratch/users/enf/b2ar_analysis/test_init.pdb")

    #traj.topology = fix_topology(traj.topology)

    #traj[-1].save_pdb("/scratch/users/enf/b2ar_analysis/test_fixed.pdb")
    #traj.save_dcd("/scratch/users/enf/b2ar_analysis/test_fixed.dcd")

    #print("got traj")
    featurizer = DihedralFeaturizer(types=dihedrals)
    features = featurizer.transform(traj_list=traj)
    #print("finished featurizing")

    directory = filename.split("/")
    traj_file = directory[len(directory) - 1]
    condition = traj_file.split("_")[0].split(".")[0]

    print(("Condition %s has features of shape %s" %
           (condition, np.shape(features))))

    new_file = "/scratch/users/enf/b2ar_analysis/combined_features/%s_features.h5" % condition
    verbosedump(features, new_file)
예제 #8
0
def featurize_project(proj_folder, top_folder, featurizer_object, stride,
                      view):

    #if already featurized dont bother(should add a warning about this)
    if os.path.exists(proj_folder + "/featurized_traj.pkl"):
        return verboseload(proj_folder + "/featurized_traj.pkl")

    if featurizer_object is None:
        featurizer = DihedralFeaturizer(types=['phi', 'psi', 'chi1'])
    else:
        try:
            featurizer = verboseload(featurizer_object)
        except:
            sys.exit("Cant Load Featurizer using msmbuilder verboseload")

    feature_dict = {}

    traj_list = glob.glob(proj_folder + "/trajectories/*.dcd")

    jobs = [(proj_folder, top_folder, featurizer, traj, stride)
            for traj in traj_list]
    results = view.map_sync(featurize_traj, jobs)

    for result in results:
        feature_dict[result[0]] = result[1]

    verbosedump(feature_dict, proj_folder + "/featurized_traj.pkl")

    return feature_dict
def featurize_file(job_tuple):

    yaml_file, protein, feat, traj_file,stride = job_tuple
    yaml_file = load_yaml_file(yaml_file)

    if feat is None:
        feat = DihedralFeaturizer(types=['phi', 'psi','chi1'])

    _check_output_folder_exists(yaml_file, protein)

    output_folder = os.path.join(yaml_file["base_dir"],
                                 protein,
                                 yaml_file["feature_dir"])

    traj_name = os.path.splitext(os.path.basename(traj_file))[0]
    output_fname = os.path.join(output_folder, traj_name+".jl")

    feat_descriptor = os.path.join(output_folder, "feature_descriptor.h5")
    try:
        trj = mdt.load(traj_file)
    except :
        warnings.warn("Removing %s because of misformed trajectory"%traj_file)
        os.remove(traj_file)
        return

    features = feat.partial_transform(trj)
    verbosedump(features, output_fname)

    if not os.path.isfile(feat_descriptor) and hasattr(feat, "describe_features"):
        dih_df = pd.DataFrame(feat.describe_features(trj[0]))
        verbosedump(dih_df, feat_descriptor)

    return
예제 #10
0
def test_dihedral_feat():

    print(base_dir)
    pool = Pool(6)
    yaml_file = load_yaml_file(os.path.join(base_dir,"mdl_dir","project.yaml"))

    for prt in ["kinase_1", "kinase_2"]:
        print(prt)
        prj = yaml_file["project_dict"][prt][0]
        featurize_project_wrapper(yaml_file, prt, feat=None, stride=1, view=pool)

        feat = DihedralFeaturizer(types=['phi', 'psi','chi1'])
        flist = glob.glob(os.path.join(base_dir, prt , yaml_file["protein_dir"],"*.hdf5"))
        for i in np.random.choice(flist, 3):
            trj = mdt.load(i)
            my_feat = feat.partial_transform(trj)
            expected_fname = os.path.join(base_dir, prt,
                                          yaml_file["feature_dir"],
                                          os.path.splitext(os.path.basename(i))[0]+".jl")
            calc_feat = verboseload(expected_fname)

            assert np.allclose(my_feat, calc_feat)



    return True
예제 #11
0
def individual_traj_featurize(data_to_process):
    #print('Running individual traj featurize\n')
    test = 1
    #print("Data process to do is :", data_to_process)
    featurizer_type = data_to_process[0]

    if featurizer_type == 'Dihedral':
        featurizer_data = DihedralFeaturizer(types=['phi', 'psi'])
        # print('Featurizer created:\n')

    featurized_data = featurizer_data.fit_transform(data_to_process[2])

    #print('Finished individual traj featurize\n')
    return [data_to_process[1], featurized_data]
예제 #12
0
def featurize_trajectories(coords, featurizer):
    if featurizer == 'RMSDFeaturizer':
        from msmbuilder.featurizer import RMSDFeaturizer
        feat = RMSDFeaturizer(reference_traj=coords[0])
    elif featurizer == 'DRIDFeaturizer':
        from msmbuilder.featurizer import DRIDFeaturizer
        feat = DRIDFeaturizer()
    elif featurizer == 'ContactFeaturizer':
        from msmbuilder.featurizer import ContactFeaturizer
        feat = ContactFeaturizer(scheme='ca')
    elif featurizer == 'DihedralFeaturizer':
        from msmbuilder.featurizer import DihedralFeaturizer
        feat = DihedralFeaturizer(types=['phi', 'psi'])
    return feat.fit_transform(coords)
예제 #13
0
def load_met():
    from msmbuilder.example_datasets import MetEnkephalin
    print(type(MetEnkephalin))
    trajs = MetEnkephalin().get().trajectories

    from msmbuilder.featurizer import AtomPairsFeaturizer
    pairs = []
    for i in range(75):
        for j in range(i):
            pairs.append((j,i))
    X = AtomPairsFeaturizer(pairs).fit_transform(trajs)

    from msmbuilder.featurizer import DihedralFeaturizer
    Y = DihedralFeaturizer().fit_transform(trajs)
    return X, Y
예제 #14
0
def load_fs():

    from msmbuilder.example_datasets import MinimalFsPeptide
    trajs = MinimalFsPeptide().get().trajectories

    from msmbuilder.featurizer import AtomPairsFeaturizer
    pairs = []
    for i in range(264):
        for j in range(i):
            pairs.append((j, i))
    X = AtomPairsFeaturizer(pairs).fit_transform(trajs)

    from msmbuilder.featurizer import DihedralFeaturizer
    Y = DihedralFeaturizer().fit_transform(trajs)
    return X, Y
예제 #15
0
    def build_model(self, user_defined_model):
        """
        Load or build a model (Pipeline from scikit-learn) to do all the transforming and fitting
        :param user_defined_model: Either a string (to load from disk) or a Pipeline object to use as model
        :return model: Return the model back
        """
        if user_defined_model is None:
            if os.path.exists(self.model_pkl_fname):
                logger.info('Loading model pkl file {}'.format(
                    self.model_pkl_fname))
                model = load_generic(self.model_pkl_fname)
            else:
                logger.info('Building default model based on dihedrals')

                # build a lag time of 1 ns for tICA and msm
                # if the stride is too big and we can't do that
                # use 1 frame and report how much that is in ns
                if self.app.meta is not None:
                    lag_time = max(1, int(1 / self.timestep))
                    logger.info(
                        'Using a lag time of {} ns for the tICA and MSM'.
                        format(lag_time * self.timestep))
                else:
                    self.timestep = None
                    lag_time = 1
                    logger.warning(
                        'Cannot determine timestep. Defaulting to 1 frame.'.
                        format(lag_time))
                model = Pipeline([('feat', DihedralFeaturizer()),
                                  ('scaler', RobustScaler()),
                                  ('tICA',
                                   tICA(lag_time=lag_time,
                                        commute_mapping=True,
                                        n_components=10)),
                                  ('clusterer',
                                   MiniBatchKMeans(n_clusters=200)),
                                  ('msm',
                                   MarkovStateModel(lag_time=lag_time,
                                                    ergodic_cutoff='off',
                                                    reversible_type=None))])
        else:
            if not isinstance(user_defined_model, Pipeline):
                raise ValueError(
                    'model is not an sklearn.pipeline.Pipeline object')
            else:
                logger.info('Using user defined model')
                model = user_defined_model
        return model
예제 #16
0
def test_code_works():
    # creates a 4-state HMM on the ALA2 data. Nothing fancy, just makes
    # sure the code runs without erroring out
    trajectories = AlanineDipeptide().get_cached().trajectories
    topology = trajectories[0].topology

    indices = topology.select('symbol C or symbol O or symbol N')
    featurizer = DihedralFeaturizer(['phi', 'psi'], trajectories[0][0])

    sequences = featurizer.transform(trajectories)

    hmm = VonMisesHMM(n_states=4, n_init=1)
    hmm.fit(sequences)

    assert len(hmm.timescales_ == 3)
    assert np.any(hmm.timescales_ > 50)
예제 #17
0
 def setUp(self):
     numpy.random.seed(12)
     self.top = 'data_app/runs/structure.prmtop'
     self.traj_1 = 'data_app/runs/run-000.nc'
     self.traj_2 = 'data_app/runs/run-001.nc'
     self.feat = DihedralFeaturizer()
     self.traj_dict = {
         0: load(self.traj_1, top=self.top),
         1: load(self.traj_2, top=self.top)
     }
     self.scaler = RobustScaler()
     self.tica = tICA(n_components=2)
     self.ftrajs = {
         0: numpy.random.rand(100, 50),
         1: numpy.random.rand(100, 50),
     }
예제 #18
0
def Get_dihedral_features_villin():
 import os 
 import shutil
 import mdtraj as md
 os.chdir('/homes/anuginueni/traj_villin')
 if(os.path.isdir('./diheds')):  
   shutil.rmtree('./diheds')
 from msmbuilder.dataset import dataset
 t=md.load( "/homes/anuginueni/traj_villin/trajectory-331.xtc",top='/homes/anuginueni/traj_villin/filtered.pdb',stride=5)
 xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) 
 from msmbuilder.featurizer import DihedralFeaturizer        #for dihedrals          
 featurizer = DihedralFeaturizer(types=['phi', 'psi'])       #for dihedrals
 diheds = xyz.fit_transform_with(featurizer, 'diheds/', fmt='dir-npy') #for dihedrals
 des_feat=featurizer.describe_features(t)
 res = [ sub['resids'] for sub in des_feat ]
 print(str(res))
 return diheds
예제 #19
0
def test_DihedralFeaturizer_describe_features_nosincos():
    feat = DihedralFeaturizer(sincos=False)
    rnd_traj = np.random.randint(len(trajectories))
    features = feat.transform([trajectories[rnd_traj]])
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))

    for f in range(25):
        f_index = np.random.choice(len(df))

        atom_inds = df.iloc[f_index].atominds
        feature_value = md.compute_dihedrals(trajectories[rnd_traj],
                                             [atom_inds])
        if feat.sincos:
            func = getattr(np, '%s' % df.iloc[f_index].otherinfo)
            feature_value = func(feature_value)

        assert (features[0][:, f_index] == feature_value.flatten()).all()
예제 #20
0
def fit_and_transform(directory, stride=5):

    projected_data_filename = "/scratch/users/enf/b2ar_analysis/phi_psi_chi_stride%d_projected.h5" % stride
    fit_model_filename = "/scratch/users/enf/b2ar_analysis/phi_psi_chi2_stride%s_tica_coords.h5" % stride
    #active_pdb_file = "/scratch/users/enf/b2ar_analysis/3P0G_pymol_prepped.pdb"
    active_pdb_file = "/scratch/users/enf/b2ar_analysis/system_B.pdb"

    tica_model = tICA(n_components=4)

    if not os.path.exists(projected_data_filename):
        print("loading feature files")
        feature_files = get_trajectory_files(directory)
        pool = mp.Pool(mp.cpu_count())
        features = pool.map(load_features, feature_files)
        pool.terminate()
        if not os.path.exists(fit_model_filename):
            print("fitting data to tICA model")
            fit_model = tica_model.fit(features)
            verbosedump(fit_model, fit_model_filename)
            transformed_data = fit_model.transform(features)
            verbosedump(transformed_data, projected_data_filename)
        else:
            print("loading tICA model")
            fit_model = verboseload(fit_model_filename)
            transformed_data = fit_model.transform(features)
            verbosedump(transformed_data, projected_data_filename)
    else:
        fit_model = verboseload(fit_model_filename)
        transformed_data = verboseload(projected_data_filename)

    active_pdb = md.load(active_pdb_file)
    top = active_pdb.topology
    atom_indices = [
        a.index for a in top.atoms
        if a.residue.is_protein and a.residue.resSeq != 341
        and a.residue.name[0:2] != "HI" and a.residue.resSeq != 79
        and a.residue.resSeq != 296 and a.residue.resSeq != 269 and a.residue.
        resSeq != 178 and a.residue.resSeq != 93 and a.residue.name != "NMA"
        and a.residue.name != "NME" and a.residue.name != "ACE"
    ]
    active_pdb = md.load(active_pdb_file, atom_indices=atom_indices)
    featurizer = DihedralFeaturizer(types=['phi', 'psi', 'chi2'])
    active_pdb_features = featurizer.transform(active_pdb)
    active_pdb_projected = fit_model.transform(active_pdb_features)
    print((active_pdb_projected[0:4]))
예제 #21
0
def Get_combined_features_villin():                                         
  from msmbuilder.featurizer import DihedralFeaturizer
  from msmbuilder.featurizer import ContactFeaturizer                            
  diheds= DihedralFeaturizer()
  contacts=ContactFeaturizer()
  features=[("di_villin",diheds),("con_villin",contacts)]
  import os
  import shutil
  os.chdir('/homes/anuginueni/traj_villin')
  if(os.path.isdir('/homes/anuginueni/traj_villin/combined')):
   shutil.rmtree('/homes/anuginueni/traj_villin/combined')
  from msmbuilder.dataset import dataset
  xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5)
  from msmbuilder.feature_selection import FeatureSelector

  comb_features=FeatureSelector(features)
  co=xyz.fit_transform_with(comb_features, '/homes/anuginueni/traj_villin/combined/', fmt='dir-npy')
  return co
예제 #22
0
def test_pickle():
    """Test pickling an HMM"""
    trajectories = AlanineDipeptide().get_cached().trajectories
    topology = trajectories[0].topology
    indices = topology.select('symbol C or symbol O or symbol N')
    featurizer = DihedralFeaturizer(['phi', 'psi'], trajectories[0][0])
    sequences = featurizer.transform(trajectories)
    hmm = VonMisesHMM(n_states=4, n_init=1)
    hmm.fit(sequences)
    logprob, hidden = hmm.predict(sequences)

    with tempfile.TemporaryFile() as savefile:
        pickle.dump(hmm, savefile)
        savefile.seek(0, 0)
        hmm2 = pickle.load(savefile)

    logprob2, hidden2 = hmm2.predict(sequences)
    assert (logprob == logprob2)
예제 #23
0
def test_feature_slicer():
    trajectories = AlanineDipeptide().get_cached().trajectories
    f = DihedralFeaturizer()
    fs = FeatureSlicer(f, indices=[0, 1])
    y1 = fs.transform(trajectories)
    assert y1[0].shape[1] == 2

    df = pd.DataFrame(fs.describe_features(trajectories[0]))
    assert len(df) == 2
    assert 'psi' not in df.featuregroup[0]
    assert 'psi' not in df.featuregroup[1]

    fs = FeatureSlicer(f, indices=[2, 3])
    y1 = fs.transform(trajectories)
    assert y1[0].shape[1] == 2

    df = pd.DataFrame(fs.describe_features(trajectories[0]))
    assert len(df) == 2
    assert 'phi' not in df.featuregroup[0]
    assert 'phi' not in df.featuregroup[1]
def featurize_trajectories(coords, featurizer):
    '''
    Input
    coords : list of 'MDTrajDataset' object

    Output 
    features : list of arrays, length n_trajs, each of shape (n_samples, n_features)
    '''
    if featurizer == 'RMSDFeaturizer':
        from msmbuilder.featurizer import RMSDFeaturizer
        feat = RMSDFeaturizer(reference_traj=coords[0])
    elif featurizer == 'DRIDFeaturizer':
        from msmbuilder.featurizer import DRIDFeaturizer
        feat = DRIDFeaturizer()
    elif featurizer == 'ContactFeaturizer':
        from msmbuilder.featurizer import ContactFeaturizer
        feat = ContactFeaturizer(scheme='ca')
    elif featurizer == 'DihedralFeaturizer':
        from msmbuilder.featurizer import DihedralFeaturizer
        feat = DihedralFeaturizer(types=['phi', 'psi'])
    return feat.fit_transform(coords)
예제 #25
0
def read_and_featurize(filename, dihedrals=['phi', 'psi', 'chi2'], stride=10):
    print(("reading and featurizing %s" % (filename)))
    traj = md.load(filename).select('chain A and protein')
    featurizer = DihedralFeaturizer(types=dihedrals)
    features = featurizer.transform(traj_list=traj)
    print("finished featurizing")

    directory = filename.split("/")
    condition = directory[len(directory) - 2]
    dcd_file = directory[len(directory) - 1]
    new_file = "%s_features_stride%d.h5" % (dcd_file.rsplit(".", 1)[0], stride)
    new_root_dir = "/home/enf/b2ar_analysis/subsampled_features/"
    new_condition_dir = "%s/%s" % (new_root_dir, condition)

    if not os.path.exists(new_condition_dir):
        os.makedirs(new_condition_dir)

    new_file_full = "%s/%s/%s" % (new_root_dir, condition, new_file)
    print(("saving features as %s" % new_file_full))

    verbosedump(features, new_file_full)
    return features
예제 #26
0
def test_FeatureSelector_describe_features():
    rnd_traj = np.random.randint(len(trajectories))
    f_ca = ContactFeaturizer(scheme='CA', ignore_nonprotein=True)
    f1 = f_ca.transform([trajectories[rnd_traj]])
    df1 = pd.DataFrame(f_ca.describe_features(trajectories[rnd_traj]))

    f_dih = DihedralFeaturizer()
    f2 = f_dih.transform([trajectories[rnd_traj]])
    df2 = pd.DataFrame(f_dih.describe_features(trajectories[rnd_traj]))

    df_dict = {}
    df_dict["ca"] = df1
    df_dict["dih"] = df2

    f_comb = FeatureSelector([('ca', f_ca), ('dih', f_dih)])
    f3 = f_comb.transform([trajectories[rnd_traj]])
    df3 = pd.DataFrame(f_comb.describe_features(trajectories[rnd_traj]))
    assert len(df3) == len(df1) + len(df2)
    df4 = pd.concat([df_dict[i] for i in f_comb.feat_list])
    # lets randomly compare 40 features
    for i in np.random.choice(range(len(df3)), 40):
        for j in df3.columns:
            assert eq(df3.iloc[i][j], df4.iloc[i][j])
예제 #27
0
                  topology=fs_peptide.data_dir + '/fs-peptide.pdb',
                  stride=10)
    print("{} trjaectories".format(len(xyz)))
    # msmbuilder does not keep track of units! You must keep track of your
    # data's timestep
    to_ns = 0.5
    print("with length {} ns".format(set(len(x) * to_ns for x in xyz)))

if which_dataset == 'apo_calmodulin':
    print('correct')
    xyz = dataset('/scratch/users/mincheol/apo_trajectories' + '/*.lh5',
                  stride=10)

#featurization
from msmbuilder.featurizer import DihedralFeaturizer
featurizer = DihedralFeaturizer(types=['phi', 'psi'], sincos=False)
print(xyz)
diheds = xyz.fit_transform_with(featurizer, 'diheds/', fmt='dir-npy')

#tICA
from msmbuilder.decomposition import tICA

if which_dataset == 'fspeptide':
    tica_model = tICA(lag_time=2, n_components=4)
if which_dataset == 'apo_calmodulin':
    tica_model = tICA(lag_time=40, n_components=20)

# fit and transform can be done in seperate steps:
tica_model = diheds.fit_with(tica_model)
tica_trajs = diheds.transform_with(tica_model, 'ticas/', fmt='dir-npy')
예제 #28
0
from msmbuilder.featurizer import DihedralFeaturizer
import os, glob
from msmbuilder.decomposition import tICA
import mdtraj as md
import pandas as pd
from msmbuilder.msm import MarkovStateModel
from msmbuilder.cluster import KMeans

flist = glob.glob("../trajectory.xtc")

top = md.load("../top.pdb")

trj_list = [md.load(i, top=top) for i in flist]
print("Found %d trajs" % len(trj_list))

f = DihedralFeaturizer(sincos=False)
dump(f, "raw_featurizer.pkl")

feat = f.transform(trj_list)

dump(feat, "raw_features.pkl")

f = load("./featurizer.pkl")
dump(f, "featurizer.pkl")
df1 = pd.DataFrame(f.describe_features(trj_list[0]))
dump(df1, "feature_descriptor.pkl")
feat = f.transform(trj_list)

dump(feat, "features.pkl")

t = tICA(lag_time=100, n_components=2, kinetic_mapping=False)
예제 #29
0
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.decomposition import tICA
from msmbuilder.cluster import MiniBatchKMeans
from msmbuilder.msm import MarkovStateModel

import numpy as np

import msmexplorer as msme

rs = np.random.RandomState(42)

# Load Fs Peptide Data
trajs = FsPeptide().get().trajectories

# Extract Backbone Dihedrals
featurizer = DihedralFeaturizer(types=['chi1'])
diheds = featurizer.fit_transform(trajs)

# Perform Dimensionality Reduction
tica_model = tICA(lag_time=2, n_components=2)
tica_trajs = tica_model.fit_transform(diheds)

# Perform Clustering
clusterer = MiniBatchKMeans(n_clusters=12, random_state=rs)
clustered_trajs = clusterer.fit_transform(tica_trajs)

# Construct MSM
msm = MarkovStateModel(lag_time=2)
assignments = msm.fit_transform(clustered_trajs)

# Plot Stacked Distributions
예제 #30
0
"""
Histogram Plot
==============
"""
from msmbuilder.example_datasets import FsPeptide
from msmbuilder.featurizer import DihedralFeaturizer
from msmbuilder.decomposition import tICA

import numpy as np

import msmexplorer as msme

# Load Fs Peptide Data
trajs = FsPeptide().get().trajectories

# Extract Backbone Dihedrals
featurizer = DihedralFeaturizer(types=['phi', 'psi'])
diheds = featurizer.fit_transform(trajs)

# Perform Dimensionality Reduction
tica_model = tICA(lag_time=2, n_components=4)
tica_trajs = tica_model.fit_transform(diheds)

# Plot Histogram
data = np.concatenate(tica_trajs, axis=0)
msme.plot_histogram(data,
                    color='oxblood',
                    quantiles=(0.5, ),
                    labels=['$tIC1$', '$tIC2$', '$tIC3$', '$tIC4$'],
                    show_titles=True)