def test_ContactFeaturizer_describe_features():
    scheme = np.random.choice(['ca','closest','closest-heavy'])
    feat = ContactFeaturizer(scheme=scheme, ignore_nonprotein=True)
    rnd_traj = np.random.randint(len(trajectories))
    features = feat.transform([trajectories[rnd_traj]])
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))

    for f in range(25):
        f_index = np.random.choice(len(df))

        residue_ind = df.iloc[f_index].resids
        feature_value, _ = md.compute_contacts(trajectories[rnd_traj],
                                               contacts=[residue_ind],
                                               scheme=scheme)
        assert (features[0][:, f_index] == feature_value.flatten()).all()
Esempio n. 2
0
    def _extract_data(self, traj):
        contact = ContactFeaturizer(
            contacts=self.contacts, scheme=self.scheme, ignore_nonprotein=self.ignore_nonprotein
        )
        distances = contact.partial_transform(traj)
        summary = contact.describe_features(traj)
        pairs = [item["resids"] for item in summary]
        resids = np.unique(pairs)
        data = []
        for resid in resids:
            idx = list(list(set(pair) - {resid})[0] for pair in pairs if resid in pair)
            mapping = np.array([True if resid in pair else False for pair in pairs])
            data.append(pd.DataFrame(distances[:, mapping], columns=[idx, len(idx) * [resid]]))

        return pd.concat(data, axis=1)
Esempio n. 3
0
def Get_contacts_features_villin():
 import os 
 import shutil
 import mdtraj as md
 os.chdir('/homes/anuginueni/traj_villin')
 if(os.path.isdir('./contacts')):  
   shutil.rmtree('./contacts')
 from msmbuilder.dataset import dataset
 xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) 
 t=md.load( "/homes/anuginueni/traj_villin/trajectory-331.xtc",top='/homes/anuginueni/traj_villin/filtered.pdb',stride=5)
 from msmbuilder.featurizer import ContactFeaturizer        #for contacts          

 featurizer = ContactFeaturizer(scheme='ca')       #for contacts
 des_feat=featurizer.describe_features(t)
 res = [ sub['resids'] for sub in des_feat ]
 print(str(res))
 contacts = xyz.fit_transform_with(featurizer, 'contacts/', fmt='dir-npy') #for contacts
 return contacts
Esempio n. 4
0
    def _extract_data(self, traj):
        contact = ContactFeaturizer(contacts=self.contacts,
                                    scheme=self.scheme,
                                    ignore_nonprotein=self.ignore_nonprotein)
        distances = contact.partial_transform(traj)
        summary = contact.describe_features(traj)
        pairs = [item['resids'] for item in summary]
        resids = np.unique(pairs)
        data = []
        for resid in resids:
            idx = list(
                list(set(pair) - {resid})[0] for pair in pairs
                if resid in pair)
            mapping = np.array(
                [True if resid in pair else False for pair in pairs])
            data.append(
                pd.DataFrame(distances[:, mapping],
                             columns=[idx, len(idx) * [resid]]))

        return pd.concat(data, axis=1)
def test_FeatureSelector_describe_features():
    rnd_traj = np.random.randint(len(trajectories))
    f_ca = ContactFeaturizer(scheme='CA', ignore_nonprotein=True)
    f1 = f_ca.transform([trajectories[rnd_traj]])
    df1 = pd.DataFrame(f_ca.describe_features(trajectories[rnd_traj]))

    f_dih = DihedralFeaturizer()
    f2 = f_dih.transform([trajectories[rnd_traj]])
    df2 = pd.DataFrame(f_dih.describe_features(trajectories[rnd_traj]))

    df_dict = {}
    df_dict["ca"] = df1
    df_dict["dih"] = df2

    f_comb = FeatureSelector([('ca', f_ca), ('dih', f_dih)])
    f3 = f_comb.transform([trajectories[rnd_traj]])
    df3 = pd.DataFrame(f_comb.describe_features(trajectories[rnd_traj]))
    assert len(df3) == len(df1) + len(df2)
    df4 = pd.concat([df_dict[i] for i in f_comb.feat_list])
    # lets randomly compare 40 features
    for i in np.random.choice(range(len(df3)), 40):
        for j in df3.columns:
            assert eq(df3.iloc[i][j], df4.iloc[i][j])
def test_FeatureSelector_describe_features():
    rnd_traj = np.random.randint(len(trajectories))
    f_ca = ContactFeaturizer(scheme='CA', ignore_nonprotein=True)
    f1 = f_ca.transform([trajectories[rnd_traj]])
    df1 = pd.DataFrame(f_ca.describe_features(trajectories[rnd_traj]))

    f_dih = DihedralFeaturizer()
    f2 = f_dih.transform([trajectories[rnd_traj]])
    df2 = pd.DataFrame(f_dih.describe_features(trajectories[rnd_traj]))

    df_dict = {}
    df_dict["ca"] = df1
    df_dict["dih"] = df2

    f_comb = FeatureSelector([('ca', f_ca), ('dih', f_dih)])
    f3 = f_comb.transform([trajectories[rnd_traj]])
    df3 = pd.DataFrame(f_comb.describe_features(trajectories[rnd_traj]))
    assert len(df3) == len(df1) + len(df2)
    df4 = pd.concat([df_dict[i] for i in f_comb.feat_list])
    # lets randomly compare 40 features
    for i in np.random.choice(range(len(df3)), 40):
        for j in df3.columns:
            assert eq(df3.iloc[i][j], df4.iloc[i][j])