def test_featureselector():
    trajectories = AlanineDipeptide().get_cached().trajectories
    fs = FeatureSelector(FEATS, which_feat="phi")

    assert fs.which_feat == ["phi"]

    y1 = fs.partial_transform(trajectories[0])
    y_ref1 = FEATS[0][1].partial_transform(trajectories[0])

    np.testing.assert_array_almost_equal(y_ref1, y1)
def test_featureselector():
    trajectories = AlanineDipeptide().get_cached().trajectories
    fs = FeatureSelector(FEATS, which_feat='phi')

    assert fs.which_feat == ['phi']

    y1 = fs.partial_transform(trajectories[0])
    y_ref1 = FEATS[0][1].partial_transform(trajectories[0])

    np.testing.assert_array_almost_equal(y_ref1, y1)
def test_variancethreshold_vs_sklearn():
    trajectories = AlanineDipeptide().get_cached().trajectories
    fs = FeatureSelector(FEATS)

    vt = VarianceThreshold(0.1)
    vtr = VarianceThresholdR(0.1)

    y = fs.partial_transform(trajectories[0])

    z1 = vt.fit_transform([y])[0]
    z_ref1 = vtr.fit_transform(y)

    np.testing.assert_array_almost_equal(z_ref1, z1)
def test_variancethreshold_vs_sklearn():
    trajectories = AlanineDipeptide().get_cached().trajectories
    fs = FeatureSelector(FEATS)

    vt = VarianceThreshold(0.1)
    vtr = VarianceThresholdR(0.1)

    y = fs.partial_transform(trajectories[0])

    z1 = vt.fit_transform([y])[0]
    z_ref1 = vtr.fit_transform(y)

    np.testing.assert_array_almost_equal(z_ref1, z1)
Esempio n. 5
0
def test_which_feat_types():
    # trajectories = AlanineDipeptide().get_cached().trajectories
    fs = FeatureSelector(FEATS, which_feat=('phi', 'psi'))
    assert fs.which_feat == ['phi', 'psi']

    fs = FeatureSelector(FEATS, which_feat=set(('phi', 'psi')))
    assert fs.which_feat == ['phi', 'psi'] or fs.which_feat == ['psi', 'phi']

    try:
        fs = FeatureSelector(FEATS, which_feat={'phi': 'psi'})
        assert False
    except TypeError:
        pass

    try:
        fs = FeatureSelector(FEATS, which_feat=['phiii'])
        assert False
    except ValueError:
        pass
def test_FeatureSelector_describe_features():
    rnd_traj = np.random.randint(len(trajectories))
    f_ca = ContactFeaturizer(scheme='CA', ignore_nonprotein=True)
    f1 = f_ca.transform([trajectories[rnd_traj]])
    df1 = pd.DataFrame(f_ca.describe_features(trajectories[rnd_traj]))

    f_dih = DihedralFeaturizer()
    f2 = f_dih.transform([trajectories[rnd_traj]])
    df2 = pd.DataFrame(f_dih.describe_features(trajectories[rnd_traj]))

    df_dict = {}
    df_dict["ca"] = df1
    df_dict["dih"] = df2

    f_comb = FeatureSelector([('ca', f_ca), ('dih', f_dih)])
    f3 = f_comb.transform([trajectories[rnd_traj]])
    df3 = pd.DataFrame(f_comb.describe_features(trajectories[rnd_traj]))
    assert len(df3) == len(df1) + len(df2)
    df4 = pd.concat([df_dict[i] for i in f_comb.feat_list])
    # lets randomly compare 40 features
    for i in np.random.choice(range(len(df3)), 40):
        for j in df3.columns:
            assert eq(df3.iloc[i][j], df4.iloc[i][j])
def test_FeatureSelector_describe_features():
    rnd_traj = np.random.randint(len(trajectories))
    f_ca = ContactFeaturizer(scheme='CA', ignore_nonprotein=True)
    f1 = f_ca.transform([trajectories[rnd_traj]])
    df1 = pd.DataFrame(f_ca.describe_features(trajectories[rnd_traj]))

    f_dih = DihedralFeaturizer()
    f2 = f_dih.transform([trajectories[rnd_traj]])
    df2 = pd.DataFrame(f_dih.describe_features(trajectories[rnd_traj]))

    df_dict = {}
    df_dict["ca"] = df1
    df_dict["dih"] = df2

    f_comb = FeatureSelector([('ca', f_ca), ('dih', f_dih)])
    f3 = f_comb.transform([trajectories[rnd_traj]])
    df3 = pd.DataFrame(f_comb.describe_features(trajectories[rnd_traj]))
    assert len(df3) == len(df1) + len(df2)
    df4 = pd.concat([df_dict[i] for i in f_comb.feat_list])
    # lets randomly compare 40 features
    for i in np.random.choice(range(len(df3)), 40):
        for j in df3.columns:
            assert eq(df3.iloc[i][j], df4.iloc[i][j])
Esempio n. 8
0
def Get_combined_features_villin():                                         
  from msmbuilder.featurizer import DihedralFeaturizer
  from msmbuilder.featurizer import ContactFeaturizer                            
  diheds= DihedralFeaturizer()
  contacts=ContactFeaturizer()
  features=[("di_villin",diheds),("con_villin",contacts)]
  import os
  import shutil
  os.chdir('/homes/anuginueni/traj_villin')
  if(os.path.isdir('/homes/anuginueni/traj_villin/combined')):
   shutil.rmtree('/homes/anuginueni/traj_villin/combined')
  from msmbuilder.dataset import dataset
  xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5)
  from msmbuilder.feature_selection import FeatureSelector

  comb_features=FeatureSelector(features)
  co=xyz.fit_transform_with(comb_features, '/homes/anuginueni/traj_villin/combined/', fmt='dir-npy')
  return co
#
# TIMESCALES
#
# The data will be loaded with a stride of 10 frames.  Each fame is 50ps, so the time per frame will be
# 500ps/frame or 0.5ns/frame.
# Each trajectory is 1000 frames long
# Lag time will be 40 frames (20 ns)  based on a visual inspection of /Misc/MSM_lag_time.ipynb
to_ns = 0.5
msm_lag = int(40 / to_ns)

#
# FEATURE INDICES
#
all_idx = np.load('indices_all.npy')

#
# OTHER PARAMETERS
#
ref_traj = md.load('../Data/data/trajectory-1.xtc',
                   top='../Data/data/fs-peptide.pdb')

featurizer = FeatureSelector(features=feats)

pipe = Pipeline([('features', featurizer),
                 ('variance_cut', VarianceThreshold()),
                 ('scaling', RobustScaler()), ('cluster', MiniBatchKMeans()),
                 ('msm', MarkovStateModel(lag_time=msm_lag, verbose=False))])

save_generic(pipe, 'model.pickl')
Esempio n. 10
0
#
# TIMESCALES
#
# The data will be loaded with a stride of 10 frames.  Each fame is 50ps, so the time per frame will be
# 500ps/frame or 0.5ns/frame.
# Each trajectory is 1000 frames long
# Lag time will be 40 frames (20 ns)  based on a visual inspection of /Misc/MSM_lag_time.ipynb

features = tica_unstructured_features
to_ns = 0.5
msm_lag = int(40/to_ns)

#
# MODEL
#
pipe = Pipeline([('features', FeatureSelector(features=tica_unstructured_features)),
                 ('variance_cut', VarianceThreshold()),
                 ('scaling', RobustScaler()),
                 ('tica', tICA(kinetic_mapping=True)),
                 ('cluster', MiniBatchKMeans()),
                 ('msm', MarkovStateModel(lag_time=msm_lag, verbose=False, n_timescales=2))])
#
# SAVE MODEL
#
savedir = 'rand-tica-all'
save_generic(pipe, '{}/model.pickl'.format(savedir))
print_feature_names(features, join(savedir, 'feature_list.txt'))



Esempio n. 11
0
def test_featureselector_transform():
    trajectories = AlanineDipeptide().get_cached().trajectories
    fs = FeatureSelector(FEATS, which_feat='psi')
    y1 = fs.transform(trajectories)
    assert len(y1) == len(trajectories)
def test_featureselector_transform():
    trajectories = AlanineDipeptide().get_cached().trajectories
    fs = FeatureSelector(FEATS, which_feat="psi")
    y1 = fs.transform(trajectories)
    assert len(y1) == len(trajectories)
Esempio n. 13
0
def test_featureselector_order():
    fs1 = FeatureSelector(FEATS)
    fs2 = FeatureSelector(FEATS[::-1])

    assert fs1.which_feat == ['phi', 'psi']
    assert fs2.which_feat == ['psi', 'phi']