def test_that_all_featurizers_run():
    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = subset_featurizer.get_atompair_indices(trj0)

    featurizer = msmbuilder.featurizer.AtomPairsFeaturizer(pair_indices)
    X_all = featurizer.transform(trajectories)
    
    featurizer = msmbuilder.featurizer.SuperposeFeaturizer(np.arange(15), trj0)
    X_all = featurizer.transform(trajectories)

    featurizer = msmbuilder.featurizer.DihedralFeaturizer(["phi" ,"psi"])
    X_all = featurizer.transform(trajectories)

    #featurizer = msmbuilder.featurizer.ContactFeaturizer()  # Doesn't work on ALA dipeptide
    #X_all = featurizer.transform(trajectories)

    featurizer = msmbuilder.featurizer.RMSDFeaturizer(trj0)
    X_all = featurizer.transform(trajectories)
    

    atom_featurizer0 = subset_featurizer.SubsetAtomPairs(pair_indices, trj0, exponent=-1.0)
    cosphi = subset_featurizer.SubsetCosPhiFeaturizer(trj0)
    sinphi = subset_featurizer.SubsetSinPhiFeaturizer(trj0)
    cospsi = subset_featurizer.SubsetCosPsiFeaturizer(trj0)
    sinpsi = subset_featurizer.SubsetSinPsiFeaturizer(trj0)
    
    featurizer = subset_featurizer.SubsetFeatureUnion([("pairs", atom_featurizer0), ("cosphi", cosphi), ("sinphi", sinphi), ("cospsi", cospsi), ("sinpsi", sinpsi)])
    featurizer.subsets = [np.arange(1) for i in range(featurizer.n_featurizers)]
    
    X_all = featurizer.transform(trajectories)
    eq(X_all[0].shape[1], 1 * featurizer.n_featurizers)
def test_that_all_featurizers_run():
    # TODO: include all featurizers, perhaps with generator tests

    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)

    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all = featurizer.transform(trajectories)

    featurizer = SuperposeFeaturizer(np.arange(15), trj0)
    X_all = featurizer.transform(trajectories)

    featurizer = DihedralFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    featurizer = VonMisesFeaturizer(["phi", "psi"])
    X_all = featurizer.transform(trajectories)

    # Below doesn't work on ALA dipeptide
    # featurizer = msmbuilder.featurizer.ContactFeaturizer()
    # X_all = featurizer.transform(trajectories)

    featurizer = RMSDFeaturizer(trj0)
    X_all = featurizer.transform(trajectories)
def test_function_featurizer():
    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    trj0 = trajectories[0]

    # use the dihedral to compute phi for ala
    atom_ind = [[4, 6, 8, 14]]
    func = compute_dihedrals
    # test with args
    f = FunctionFeaturizer(func, func_args={"indices": atom_ind})
    res1 = f.transform([trj0])

    # test with function in a fucntion without any args
    def funcception(trj):
        return compute_phi(trj)[1]

    f = FunctionFeaturizer(funcception)
    res2 = f.transform([trj0])

    # know results
    f3 = DihedralFeaturizer(['phi'], sincos=False)
    res3 = f3.transform([trj0])

    # compare all
    for r in [res2, res3]:
        np.testing.assert_array_almost_equal(res1, r)
def test_alanine_dipeptide():
    # will produce 0 features because not enough peptides

    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    featurizer = msmbuilder.featurizer.AlphaAngleFeaturizer()
    nothing = featurizer.transform(trajectories)

    assert(nothing[0].shape[1] == 0)
def test_SubsetAtomPairs2():
    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = subset_featurizer.get_atompair_indices(trj0)
    featurizer = msmbuilder.featurizer.AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = subset_featurizer.SubsetAtomPairs(pair_indices, trj0, subset=np.array([0, 1]))
    X_all = featurizer.transform(trajectories)

    any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
def test_alanine_dipeptide():
    # This test takes the rmsd of the 0th set of alanine dipeptide
    # trajectories relative to the 0th frame of the dataset.
    # The test asserts that the first rmsd calculated will be zero.

    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    featurizer = msmbuilder.featurizer.StrucRMSDFeaturizer(
        trajectories[0], trajectories[0][0], range(trajectories[0].n_atoms))
    data = featurizer.transform(trajectories[0])

    assert(data[0] < 1e-3)
def test_SubsetAtomPairs_2():
    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices, trj0,
                                 subset=np.arange(len(pair_indices)))
    X_all = featurizer.transform(trajectories)

    any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
def test_von_mises_featurizer():
    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]

    featurizer = VonMisesFeaturizer(["phi", "psi"], n_bins=18)
    X_all = featurizer.transform(trajectories)
    n_frames = trajectories[0].n_frames
    assert X_all[0].shape == (n_frames, 36), ("unexpected shape returned: (%s, %s)" %
                                          X_all[0].shape)

    featurizer = VonMisesFeaturizer(["phi", "psi"], n_bins=10)
    X_all = featurizer.transform(trajectories)
    assert X_all[0].shape == (n_frames, 20), ("unexpected shape returned: (%s, %s)" %
                                          X_all[0].shape)
def test_alanine_dipeptide_basic():
    # This test takes the rmsd of the 0th set of alanine dipeptide
    # trajectories relative to the 0th frame of the dataset.
    # The test asserts that all rmsd's calculated will be equal
    # to the ones that would be calculated straight from mdtraj.

    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    featurizer = StrucRMSDFeaturizer(trajectories[0][0])
    data = featurizer.transform(trajectories[0:1])

    true_rmsd = md.rmsd(trajectories[0], trajectories[0][0])

    np.testing.assert_array_almost_equal(data[0][:,0], true_rmsd, decimal=4)
def test_two_refs_omitting_indices():
    # This test verifies that the result produced when
    # atom_indices are omitted is the same as the result
    # produced when atom_indices is all atom indices.

    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    featurizer_indices = StrucRMSDFeaturizer(trajectories[0][0:2],
                                    np.arange(trajectories[0].n_atoms))
    data_indices = featurizer_indices.transform(trajectories[0:1])

    featurizer = StrucRMSDFeaturizer(trajectories[0][0:2])
    data = featurizer.transform(trajectories[0:1])

    np.testing.assert_array_almost_equal(data[0], data_indices[0], decimal=4)
def test_two_refs():
    # This test uses the 0th and 1st frames of the 0th set of
    # adp trajectories as the two reference trajectories and
    # ensures that the rmsd of the 0th frame of the dataset with
    # the 0th reference are identical and the 1st frame of the
    # dataset with the 1st reference are identical.

    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    featurizer = msmbuilder.featurizer.StrucRMSDFeaturizer(
        trajectories[0], trajectories[0][0:2], range(trajectories[0].n_atoms))
    data = featurizer.transform(trajectories[0])

    # TODO: Figure out why arrays are 3D
    assert(data[0][0][0] - data[1][0][1] < 1e-3)
    assert(data[1][0][0] - data[0][0][1] < 1e-3)
def test_SubsetAtomPairs_3():
    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)
    featurizer = AtomPairsFeaturizer(pair_indices)
    X_all0 = featurizer.transform(trajectories)

    featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.array([0, 1]))
    X_all = featurizer.transform(trajectories)

    try:
        any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
    except AssertionError:
        pass
    else:
        raise AssertionError("Did not raise an assertion!")
def test_two_refs_basic():
    # This test uses the 0th and 1st frames of the 0th set of
    # adp trajectories as the two reference trajectories and
    # ensures that the rmsd of the 0th frame of the dataset with
    # the 0th reference are identical and the 1st frame of the
    # dataset with the 1st reference are identical.

    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    featurizer = StrucRMSDFeaturizer(trajectories[0][0:2])
    data = featurizer.transform(trajectories[0:1])

    true_rmsd = np.zeros((trajectories[0].n_frames, 2))
    for frame in range(2):
        true_rmsd[:, frame] = md.rmsd(trajectories[0], trajectories[0][frame])

    np.testing.assert_almost_equal(data[0][0,0], data[0][1,1], decimal=3)
    np.testing.assert_almost_equal(data[0][1,0], data[0][0,1], decimal=3)

    np.testing.assert_array_almost_equal(data[0], true_rmsd, decimal=4)
def test_different_indices():
    # This test verifies that the rmsd's calculated from
    # different sets of atom indices are not the same,
    # but that the arrays are still the same shape.

    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    n_atoms = trajectories[0].n_atoms
    halfway_point = n_atoms//2

    featurizer_first_half = StrucRMSDFeaturizer(trajectories[0][0],
                                    np.arange(halfway_point))
    data_first_half = featurizer_first_half.transform(trajectories[0:1])
    featurizer_second_half = StrucRMSDFeaturizer(trajectories[0][0],
                                    np.arange(halfway_point,n_atoms))
    data_second_half = featurizer_second_half.transform(trajectories[0:1])

    assert data_first_half[0].shape == data_second_half[0].shape
    # janky way to show that the arrays shouldn't be equal here
    assert sum(data_first_half[0][:,0]) != sum(data_second_half[0][:,0])
def test_that_all_featurizers_run():
    dataset = fetch_alanine_dipeptide()
    trajectories = dataset["trajectories"]
    trj0 = trajectories[0][0]
    atom_indices, pair_indices = get_atompair_indices(trj0)

    atom_featurizer0 = SubsetAtomPairs(pair_indices, trj0, exponent=-1.0)
    cosphi = SubsetCosPhiFeaturizer(trj0)
    sinphi = SubsetSinPhiFeaturizer(trj0)
    cospsi = SubsetCosPsiFeaturizer(trj0)
    sinpsi = SubsetSinPsiFeaturizer(trj0)

    featurizer = SubsetFeatureUnion([
        ("pairs", atom_featurizer0),
        ("cosphi", cosphi),
        ("sinphi", sinphi),
        ("cospsi", cospsi),
        ("sinpsi", sinpsi)
    ])
    featurizer.subsets = [np.arange(1) for i in range(featurizer.n_featurizers)]

    X_all = featurizer.transform(trajectories)
    eq(X_all[0].shape[1], 1 * featurizer.n_featurizers)