def test_ContactFeaturizer_describe_features(): scheme = np.random.choice(['ca','closest','closest-heavy']) feat = ContactFeaturizer(scheme=scheme, ignore_nonprotein=True) rnd_traj = np.random.randint(len(trajectories)) features = feat.transform([trajectories[rnd_traj]]) df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj])) for f in range(25): f_index = np.random.choice(len(df)) residue_ind = df.iloc[f_index].resids feature_value, _ = md.compute_contacts(trajectories[rnd_traj], contacts=[residue_ind], scheme=scheme) assert (features[0][:, f_index] == feature_value.flatten()).all()
def test_distance_to_logistic(): trajectories = MinimalFsPeptide().get_cached().trajectories steepness = np.absolute(10 * np.random.randn()) center = np.absolute(np.random.randn()) contactfeaturizer = ContactFeaturizer() contacts = contactfeaturizer.transform(trajectories) logisticcontactfeaturizer = LogisticContactFeaturizer(center=center, steepness=steepness) logistics = logisticcontactfeaturizer.transform(trajectories) for n in range(10): i = np.random.randint(0, contacts[0].shape[0] - 1) j = np.random.randint(0, contacts[0].shape[1] - 1) x = contacts[0][i][j] y = logistics[0][i][j] if x > center: assert y < 0.5 if x < center: assert y > 0.5
def test_soft_min_contact_featurizer(): # just get one frame for now traj = MinimalFsPeptide().get_cached().trajectories[0][0] soft_min_beta = 20 ri, rj = np.random.choice( np.arange(traj.top.n_residues), size=2, replace=False) aind_i = [i.index for i in traj.top.residue(ri).atoms] aind_j = [i.index for i in traj.top.residue(rj).atoms] atom_pairs = [i for i in itertools.product(aind_i, aind_j)] featuizer = ContactFeaturizer(contacts=[[ri, rj]], scheme='closest', soft_min=True, soft_min_beta=soft_min_beta) features = featuizer.transform(([traj]))[0] distances = md.compute_distances(traj, atom_pairs) distances = soft_min_beta / \ np.log(np.sum(np.exp(soft_min_beta / distances), axis=1)) np.allclose(features, distances)
def test_distance_to_logistic(): trajectories = MinimalFsPeptide().get_cached().trajectories steepness = np.absolute(10 * np.random.randn()) center = np.absolute(np.random.randn()) contactfeaturizer = ContactFeaturizer() contacts = contactfeaturizer.transform(trajectories) logisticcontactfeaturizer = LogisticContactFeaturizer(center=center, steepness=steepness) logistics = logisticcontactfeaturizer.transform(trajectories) for n in range(10): i = np.random.randint(0, contacts[0].shape[0] - 1) j = np.random.randint(0, contacts[0].shape[1] - 1) x = contacts[0][i][j] y = logistics[0][i][j] if (x > center): assert y < 0.5 if (x < center): assert y > 0.5
def test_FeatureSelector_describe_features(): rnd_traj = np.random.randint(len(trajectories)) f_ca = ContactFeaturizer(scheme='CA', ignore_nonprotein=True) f1 = f_ca.transform([trajectories[rnd_traj]]) df1 = pd.DataFrame(f_ca.describe_features(trajectories[rnd_traj])) f_dih = DihedralFeaturizer() f2 = f_dih.transform([trajectories[rnd_traj]]) df2 = pd.DataFrame(f_dih.describe_features(trajectories[rnd_traj])) df_dict = {} df_dict["ca"] = df1 df_dict["dih"] = df2 f_comb = FeatureSelector([('ca', f_ca), ('dih', f_dih)]) f3 = f_comb.transform([trajectories[rnd_traj]]) df3 = pd.DataFrame(f_comb.describe_features(trajectories[rnd_traj])) assert len(df3) == len(df1) + len(df2) df4 = pd.concat([df_dict[i] for i in f_comb.feat_list]) # lets randomly compare 40 features for i in np.random.choice(range(len(df3)), 40): for j in df3.columns: assert eq(df3.iloc[i][j], df4.iloc[i][j])
def test_FeatureSelector_describe_features(): rnd_traj = np.random.randint(len(trajectories)) f_ca = ContactFeaturizer(scheme='CA', ignore_nonprotein=True) f1 = f_ca.transform([trajectories[rnd_traj]]) df1 = pd.DataFrame(f_ca.describe_features(trajectories[rnd_traj])) f_dih = DihedralFeaturizer() f2 = f_dih.transform([trajectories[rnd_traj]]) df2 = pd.DataFrame(f_dih.describe_features(trajectories[rnd_traj])) df_dict = {} df_dict["ca"] = df1 df_dict["dih"] = df2 f_comb = FeatureSelector([('ca', f_ca), ('dih', f_dih)]) f3 = f_comb.transform([trajectories[rnd_traj]]) df3 = pd.DataFrame(f_comb.describe_features(trajectories[rnd_traj])) assert len(df3) == len(df1) + len(df2) df4 = pd.concat([df_dict[i] for i in f_comb.feat_list]) # lets randomly compare 40 features for i in np.random.choice(range(len(df3)), 40): for j in df3.columns: assert eq(df3.iloc[i][j], df4.iloc[i][j])
def test_soft_min_contact_featurizer(): # just get one frame for now traj = MinimalFsPeptide().get_cached().trajectories[0][0] soft_min_beta = 20 ri, rj = np.random.choice(np.arange(traj.top.n_residues), size=2, replace=False) aind_i = [i.index for i in traj.top.residue(ri).atoms] aind_j = [i.index for i in traj.top.residue(rj).atoms] atom_pairs = [i for i in itertools.product(aind_i, aind_j)] featuizer = ContactFeaturizer(contacts=[[ri, rj]], scheme='closest', soft_min=True, soft_min_beta=soft_min_beta) features = featuizer.transform(([traj]))[0] distances = md.compute_distances(traj, atom_pairs) distances = soft_min_beta / \ np.log(np.sum(np.exp(soft_min_beta / distances), axis=1)) np.allclose(features, distances)
def test_contacts(): trajectories = MinimalFsPeptide().get_cached().trajectories contactfeaturizer = ContactFeaturizer() contacts = contactfeaturizer.transform(trajectories) assert contacts[0].shape[1] == 171
def test_contacts(): trajectories = MinimalFsPeptide().get_cached().trajectories contactfeaturizer = ContactFeaturizer() contacts = contactfeaturizer.transform(trajectories) assert contacts[0].shape[1] == 171