예제 #1
0
def test_kappa_angle_featurizer_2():
    trajectories = MetEnkephalin().get_cached().trajectories
    top = trajectories[0].topology
    feat = KappaAngleFeaturizer(offset=2)
    df = pd.DataFrame(feat.describe_features(trajectories[0]))
    assert not sorted(df.resids[0]) == [0,1,2]
    assert sorted(df.resids[0]) == [0,2,4]
예제 #2
0
def test_kappa_angle_featurizer_1():
    trajectories = MetEnkephalin().get_cached().trajectories
    top = trajectories[0].topology
    feat = KappaAngleFeaturizer(offset=1)
    df = pd.DataFrame(feat.describe_features(trajectories[0]))
    assert sorted(df.resids[0]) == [0,1,2]
    cas = [i.index for i in top.atoms if i.name=='CA']
    assert sorted(df.atominds[0]) == cas[:3]
예제 #3
0
def test_angle_featurizer():
    trajectories = MetEnkephalin().get_cached().trajectories
    top = trajectories[0].topology
    feat = KappaAngleFeaturizer(offset=2)
    feat_1 = feat.transform([trajectories[0]])
    df = pd.DataFrame(feat.describe_features(trajectories[0]))
    atom_inds = np.vstack(df.atominds)
    feat = AngleFeaturizer(angle_indices=atom_inds)
    feat_2 = feat.transform([trajectories[0]])
    assert np.all(feat_1[0] == feat_2[0])
예제 #4
0
def build_dataset():
    trajs = MetEnkephalin().get().trajectories

    pairs = []
    for i in range(trajs[0].n_atoms):
        for j in range(i):
            pairs.append((i, j))
    np.random.seed(0)
    np.random.shuffle(pairs)
    n_pairs = 200

    return AtomPairsFeaturizer(pairs[:n_pairs]).transform(
        [traj[::10] for traj in trajs])
예제 #5
0
def load_met():
    from msmbuilder.example_datasets import MetEnkephalin
    print(type(MetEnkephalin))
    trajs = MetEnkephalin().get().trajectories

    from msmbuilder.featurizer import AtomPairsFeaturizer
    pairs = []
    for i in range(75):
        for j in range(i):
            pairs.append((j,i))
    X = AtomPairsFeaturizer(pairs).fit_transform(trajs)

    from msmbuilder.featurizer import DihedralFeaturizer
    Y = DihedralFeaturizer().fit_transform(trajs)
    return X, Y
예제 #6
0
def test_common_contacts_featurizer_1():
    trajectories = MetEnkephalin().get_cached().trajectories
    top = trajectories[0].topology
    met_seq = top.to_fasta(0)
    #fake sequence has an insertion
    fake_met_eq ='YGGFMF'
    alignment={}
    #do "alignment "
    alignment["actual"] = met_seq+"-"
    alignment["fake"] = fake_met_eq

    feat = CommonContactFeaturizer(alignment=alignment, contacts='all',
                                   same_residue=True)
    rnd_traj = np.random.randint(len(trajectories))
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))
    features = feat.transform([trajectories[rnd_traj]])
예제 #7
0
def test_common_contacts_featurizer_1():
    trajectories = MetEnkephalin().get_cached().trajectories
    top = trajectories[0].topology
    met_seq = top.to_fasta(0)
    # fake sequence has an insertion
    fake_met_eq = 'YGGFMF'
    alignment = {}
    # do "alignment "
    alignment["actual"] = met_seq + "-"
    alignment["fake"] = fake_met_eq
    max_len = max([len(alignment[i]) for i in alignment.keys()])
    contacts = [i for i in itertools.combinations(np.arange(max_len), 2)]
    feat = CommonContactFeaturizer(alignment=alignment, contacts=contacts,
                                   same_residue=True)
    rnd_traj = np.random.randint(len(trajectories))
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))
    features = feat.transform([trajectories[rnd_traj]])
예제 #8
0
def test_common_contacts_featurizer_2():
    trajectories = MetEnkephalin().get_cached().trajectories
    top = trajectories[0].topology
    met_seq = top.to_fasta(0)
    #fake sequence
    fake_met_eq ='FGGFM'
    alignment={}
    #do "alignment "
    alignment["actual"] = met_seq
    alignment["fake"] = fake_met_eq

    feat = CommonContactFeaturizer(alignment=alignment, contacts='all',
                                   same_residue=True)

    rnd_traj = np.random.randint(len(trajectories))
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))
    assert(np.all([j!=0 for i in df.resids for j in i]))
예제 #9
0
def test_common_contacts_featurizer_2():
    #test randomly mutates one of the residues to make sure that residues contacts are not
    #included
    trajectories = MetEnkephalin().get_cached().trajectories
    top = trajectories[0].topology
    met_seq = top.to_fasta(0)
    #randomly "mutate one of the residues to alanine
    rnd_loc = np.random.randint(len(met_seq))
    fake_met_eq=met_seq[:rnd_loc]+"A"+met_seq[rnd_loc+1:]
    alignment={}
    #do "alignment "
    alignment["actual"] = met_seq
    alignment["fake"] = fake_met_eq

    feat = CommonContactFeaturizer(alignment=alignment, contacts='all',
                                   same_residue=True)

    rnd_traj = np.random.randint(len(trajectories))
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))
    assert(np.all([j!=rnd_loc for i in df.resids for j in i]))