Example #1
0
def test_order_1():
    with tempdir():
        with dataset('ds1.h5', 'w', 'hdf5') as ds1:
            for i in range(20):
                ds1[i] = np.random.randn(10)
            assert list(ds1.keys()) == list(range(20))

        with dataset('ds1/', 'w', 'dir-npy') as ds1:
            for i in range(20):
                ds1[i] = np.random.randn(10)
            assert list(ds1.keys()) == list(range(20))
Example #2
0
def test_order_1():
    with tempdir():
        with dataset('ds1.h5', 'w', 'hdf5') as ds1:
            for i in range(20):
                ds1[i] = np.random.randn(10)
            assert list(ds1.keys()) == list(range(20))

        with dataset('ds1/', 'w', 'dir-npy') as ds1:
            for i in range(20):
                ds1[i] = np.random.randn(10)
            assert list(ds1.keys()) == list(range(20))
Example #3
0
def test_mdtraj_1():
    ds = dataset(get_fn('') + '*.pdb', fmt='mdtraj', verbose=True)
    print(ds.keys())
    print(ds.get(0))
    print(ds.provenance)

    ds = dataset(get_fn('') + '*.pdb', fmt='mdtraj', atom_indices=[1, 2],
                 verbose=True)
    print(ds.keys())
    print(ds.get(0))
    print(ds.provenance)
Example #4
0
def test_union_3():
    with tempdir():
        # This doesn't work with py2.6
        with dataset('ds1/', 'w', 'dir-npy') as ds1, \
                dataset('ds2/', 'w', 'dir-npy') as ds2:
            ds1[0] = np.random.randn(10, 2)
            ds1[1] = np.random.randn(10)
            ds2[0] = np.random.randn(10, 4)
            # Uneven length!

        with assert_raises(ValueError):
            mds = dataset(['ds1', 'ds2'])
Example #5
0
def test_hdf5_3():
    with tempdir():
        with dataset('ds.h5', 'w', 'hdf5') as ds:
            ds[0] = np.random.randn(10)
            ds[1] = np.random.randn(10)
            ref_sum = _sum_helper(ds)

        iter_args = (dataset('ds.h5') for _ in range(5))

        sums = Parallel(n_jobs=2)(delayed(_sum_helper)(a) for a in iter_args)

        assert all(s == ref_sum for s in sums)
Example #6
0
def test_union_3():
    with tempdir():
        # This doesn't work with py2.6
        with dataset('ds1/', 'w', 'dir-npy') as ds1, \
                dataset('ds2/', 'w', 'dir-npy') as ds2:
            ds1[0] = np.random.randn(10, 2)
            ds1[1] = np.random.randn(10)
            ds2[0] = np.random.randn(10,4)
            # Uneven length!

        with assert_raises(ValueError):
            mds = dataset(['ds1', 'ds2'])
Example #7
0
def test_union_2():
    with tempdir():
        # This doesn't work with py2.6
        with dataset('ds1/', 'w', 'dir-npy') as ds1, \
                dataset('ds2/', 'w', 'dir-npy') as ds2:
            ds1[0] = np.random.randn(10, 2)
            ds1[1] = np.random.randn(10)
            ds2[0] = np.random.randn(10, 4)
            ds2[1] = np.random.randn(10, 4)

        mds = dataset(['ds1', 'ds2'])
        mds_out = mds.create_derived('derived', fmt='dir-npy')
        assert len(mds_out.provenance.split('\n')) > 0
Example #8
0
def test_mdtraj_1():
    ds = dataset(get_fn('') + '*.pdb', fmt='mdtraj', verbose=True)
    print(ds.keys())
    print(ds.get(0))
    print(ds.provenance)

    ds = dataset(get_fn('') + '*.pdb',
                 fmt='mdtraj',
                 atom_indices=[1, 2],
                 verbose=True)
    print(ds.keys())
    print(ds.get(0))
    print(ds.provenance)
Example #9
0
def test_uneven_n():
    with tempdir():
        # This doesn't work with py2.6
        with dataset('ds1/', 'w', 'dir-npy') as ds1, \
                dataset('ds2/', 'w', 'dir-npy') as ds2:
            ds1[0] = np.random.randn(10, 2)
            ds1[1] = np.random.randn(5, 2)
            ds2[0] = np.random.randn(10, 4)
            # Uneven number of trajs!

            fu = FeatureUnion(normalize=False)
            with assert_raises(ValueError):
                fu.fit((ds1, ds2))
Example #10
0
def test_hdf5_3():
    with tempdir():
        with dataset('ds.h5', 'w', 'hdf5') as ds:
            ds[0] = np.random.randn(10)
            ds[1] = np.random.randn(10)
            ref_sum = _sum_helper(ds)

        iter_args = (dataset('ds.h5') for _ in range(5))

        sums = Parallel(n_jobs=2)(
            delayed(_sum_helper)(a) for a in iter_args)

        assert all(s == ref_sum for s in sums)
def test_uneven_n():
    with tempdir():
        # This doesn't work with py2.6
        with dataset('ds1/', 'w', 'dir-npy') as ds1, \
                dataset('ds2/', 'w', 'dir-npy') as ds2:
            ds1[0] = np.random.randn(10, 2)
            ds1[1] = np.random.randn(5, 2)
            ds2[0] = np.random.randn(10, 4)
            # Uneven number of trajs!

            fu = FeatureUnion(normalize=False)
            with assert_raises(ValueError):
                fu.fit((ds1, ds2))
Example #12
0
def test_union_2():
    with tempdir():
        # This doesn't work with py2.6
        with dataset('ds1/', 'w', 'dir-npy') as ds1, \
                dataset('ds2/', 'w', 'dir-npy') as ds2:
            ds1[0] = np.random.randn(10, 2)
            ds1[1] = np.random.randn(10)
            ds2[0] = np.random.randn(10,4)
            ds2[1] = np.random.randn(10,4)


        mds = dataset(['ds1', 'ds2'])
        mds_out = mds.create_derived('derived', fmt='dir-npy')
        assert len(mds_out.provenance.split('\n')) > 0
Example #13
0
    def test_assign(self):
        with open(os.devnull) as dn:
            subprocess.call(
                [
                    'msmb', 'SolventShellsAssigner', '--trjs', self.traj_fn,
                    '--solute_indices', self.ute_fn, '--solvent_indices',
                    self.vent_fn, '--n_shells', '3', '--shell_width', '1',
                    '--out', self.outfn, '--chunk', '2'
                ], stdout=dn, stderr=dn
            )

        data = dataset(self.outfn)[0]

        should_be = np.array([
            [0, 0, 0, 0],
            [0, 1, 0, 0],
            [1, 0, 0, 1],
            [1, 1, 0, 1],
            [2, 0, 0, 2],
            [2, 1, 0, 2],
            # 3
            # 4
            [5, 1, 0, 0],
            [6, 1, 0, 1],
            [7, 1, 0, 2],
            # 8
        ])

        np.testing.assert_array_equal(data, should_be)
Example #14
0
    def test_assign(self):
        with open(os.devnull) as dn:
            subprocess.call([
                'msmb', 'SolventShellsAssigner', '--trjs', self.traj_fn,
                '--solute_indices', self.ute_fn, '--solvent_indices',
                self.vent_fn, '--n_shells', '3', '--shell_width', '1', '--out',
                self.outfn, '--chunk', '2'
            ],
                            stdout=dn,
                            stderr=dn)

        data = dataset(self.outfn)[0]

        should_be = np.array([
            [0, 0, 0, 0],
            [0, 1, 0, 0],
            [1, 0, 0, 1],
            [1, 1, 0, 1],
            [2, 0, 0, 2],
            [2, 1, 0, 2],
            # 3
            # 4
            [5, 1, 0, 0],
            [6, 1, 0, 1],
            [7, 1, 0, 2],
            # 8
        ])

        np.testing.assert_array_equal(data, should_be)
Example #15
0
def test_1():
    path = tempfile.mkdtemp()
    shutil.rmtree(path)
    try:
        X = np.random.randn(10, 2)
        ds = dataset(path, 'w', 'dir-npy')
        ds[0] = X
        assert set(os.listdir(path)) == set(('PROVENANCE.txt', '00000000.npy'))
        np.testing.assert_array_equal(ds[0], X)

        assert_raises(IndexError, lambda: ds[1])
        assert len(ds) == 1

        Y = np.zeros((10, 1))
        Z = np.ones((2, 2))
        ds[1] = Y
        ds[2] = Z
        np.testing.assert_array_equal(ds[1], Y)
        np.testing.assert_array_equal(ds[2], Z)
        assert len(ds) == 3

        for i, item in enumerate(ds):
            np.testing.assert_array_equal(item, [X, Y, Z][i])
    except:
        raise
    finally:
        shutil.rmtree(path)
Example #16
0
def test_2():
    path1 = tempfile.mkdtemp()
    path2 = tempfile.mkdtemp()
    shutil.rmtree(path1)
    shutil.rmtree(path2)
    try:

        X = np.random.randn(10, 2)
        Y = np.random.randn(10, 2)
        ds1 = dataset(path1, 'w', 'dir-npy')
        ds1[0] = X

        ds2 = ds1.create_derived(path2)
        ds2[0] = Y

        np.testing.assert_array_equal(ds1[0], X)
        np.testing.assert_array_equal(ds2[0], Y)
        assert len(ds1) == 1
        assert len(ds2) == 1

        prov2 = ds2.provenance
        print(prov2)
        assert 2 == sum([s.startswith('  Command') for s in prov2.splitlines()])

    except:
        raise
    finally:
        shutil.rmtree(path1)
        shutil.rmtree(path2)
Example #17
0
def test_2():
    path1 = tempfile.mkdtemp()
    path2 = tempfile.mkdtemp()
    shutil.rmtree(path1)
    shutil.rmtree(path2)
    try:

        X = np.random.randn(10, 2)
        Y = np.random.randn(10, 2)
        ds1 = dataset(path1, 'w', 'dir-npy')
        ds1[0] = X

        ds2 = ds1.create_derived(path2)
        ds2[0] = Y

        np.testing.assert_array_equal(ds1[0], X)
        np.testing.assert_array_equal(ds2[0], Y)
        assert len(ds1) == 1
        assert len(ds2) == 1

        prov2 = ds2.provenance
        print(prov2)
        assert 2 == sum(
            [s.startswith('  Command') for s in prov2.splitlines()])

    except:
        raise
    finally:
        shutil.rmtree(path1)
        shutil.rmtree(path2)
Example #18
0
def test_1():
    path = tempfile.mkdtemp()
    shutil.rmtree(path)
    try:
        X = np.random.randn(10, 2)
        ds = dataset(path, 'w', 'dir-npy')
        ds[0] = X
        assert set(os.listdir(path)) == set(('PROVENANCE.txt', '00000000.npy'))
        np.testing.assert_array_equal(ds[0], X)

        assert_raises(IndexError, lambda: ds[1])
        assert len(ds) == 1

        Y = np.zeros((10, 1))
        Z = np.ones((2, 2))
        ds[1] = Y
        ds[2] = Z
        np.testing.assert_array_equal(ds[1], Y)
        np.testing.assert_array_equal(ds[2], Z)
        assert len(ds) == 3

        for i, item in enumerate(ds):
            np.testing.assert_array_equal(item, [X, Y, Z][i])
    except:
        raise
    finally:
        shutil.rmtree(path)
Example #19
0
    def test_partial_transform(self):
        with open(os.devnull) as dn:
            subprocess.call(
                [
                    'msmb', 'SolventShellsFeaturizer', '--trjs', self.traj_fn,
                    '--solute_indices', self.ute_fn, '--solvent_indices',
                    self.vent_fn, '--n_shells', '3', '--shell_width', '1',
                    '--out', self.outfn
                ], stdout=dn, stderr=dn
            )
        data = dataset(self.outfn)[0]

        norm = np.asarray([4 * np.pi * r ** 2 for r in [0.5, 1.5, 2.5]])
        should_be = np.array([
            [2, 0, 0],
            [0, 2, 0],
            [0, 0, 2],
            [0, 0, 0],
            [0, 0, 0],
            [1, 0, 0],
            [0, 1, 0],
            [0, 0, 1],
            [0, 0, 0]
        ]) / norm

        np.testing.assert_array_equal(data, should_be)
Example #20
0
    def start(self):
        if os.path.exists(self.out):
            self.error('File exists: %s' % self.out)

        print(self.instance)
        if os.path.exists(os.path.expanduser(self.top)):
            top = os.path.expanduser(self.top)
        else:
            top = None

        traj_dataset = MDTrajDataset(self.trjs, topology=top,
                                     stride=self.instance.stride, verbose=False)

        with dataset(self.assignments, mode='r') as assn_dataset:
            out_dataset = assn_dataset.create_derived(self.out, fmt='dir-npy')
            pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()],
                               maxval=len(assn_dataset)).start()
            for tr_key, as_key in pbar(
                    zip(traj_dataset.keys(), assn_dataset.keys())
            ):
                out_dataset[as_key] = self.instance.partial_transform(
                    (traj_dataset[tr_key], assn_dataset[as_key])
                )
            out_dataset.close()

        print("\nSaving transformed dataset to '%s'" % self.out)
        print("To load this dataset interactive inside an IPython")
        print("shell or notebook, run\n")
        print("  $ ipython")
        print("  >>> from msmbuilder.dataset import dataset")
        print("  >>> ds = dataset('%s')\n" % self.out)
Example #21
0
def save_dataset(data, path):
    if os.path.exists(path):
        cmd = "rm -rf %s" % path
        subprocess.call(cmd, shell=True)
    ds = dataset(path, 'w', 'dir-npy')
    for i in range(0, len(data)):
        ds[i] = data[i]
    ds.close()
def featurizing_the_conformations(featurizer, xtc_traj_folder, traj_list_array, pdb_name):
    #we usually use dihedral, rmsd or pairwise distance featurizers in MSM
    output_features=[]
    for trajfile in traj_list_array:
        xyz=dataset(trajectory_dir+trajfile, topology=trajectory_dir+pdb_name) #xyz is the coordinates for the frames in the trajectory
        temp=featurizer.fit_transform(xyz) #we transform the coordinates into pairwise distaneces
        print("loaded %s into (%d,%d) dimensional file"%(trajfile, len(temp[0]), len(temp[0][0])))
        output_features.append(temp[0])
    return output_features
def test_transform_command_1():
    with tempdir():
        shell("msmb KCenters -i {data_home}/alanine_dipeptide/*.dcd "
              "-o model.pkl --top {data_home}/alanine_dipeptide/ala2.pdb "
              "--metric rmsd".format(data_home=get_data_home()))
        shell("msmb TransformDataset -i {data_home}/alanine_dipeptide/*.dcd "
              "-m model.pkl -t transformed.h5 --top "
              "{data_home}/alanine_dipeptide/ala2.pdb".format(data_home=get_data_home()))

        eq(dataset('transformed.h5')[0], load('model.pkl').labels_[0])
Example #24
0
def test_append_dirnpy():
    path = tempfile.mkdtemp()
    shutil.rmtree(path)
    try:
        with dataset(path, 'w', 'dir-npy') as ds:
            ds[0] = np.random.randn(10, 2)
        with dataset(path, 'a', 'dir-npy') as ds:
            ds[1] = np.random.randn(10, 2)
        with dataset(path, 'a', 'dir-npy') as ds:
            ds[2] = np.random.randn(10, 2)
        with dataset(path, 'a', 'dir-npy') as ds:
            # Overwrite
            ds[2] = np.random.randn(10, 2)

        np.testing.assert_array_equal(ds[:][0], ds[0])
        np.testing.assert_array_equal(ds[:][1], ds[1])
        np.testing.assert_array_equal(ds[:][2], ds[2])

    finally:
        shutil.rmtree(path)
Example #25
0
def test_hdf5_1():
    with tempdir():
        ds = dataset('ds.h5', 'w', 'hdf5')
        print(ds.provenance)
        ds[0] = np.zeros(10)
        np.testing.assert_array_equal(ds.get(0), np.zeros(10))
        assert list(ds.keys()) == [0]
        assert len(ds) == 1

        ds[0] = np.random.randn(10, 1)
        ds[1] = np.random.randn(10, 2)
        ds[2] = np.random.randn(10, 3)

        np.testing.assert_array_equal(ds[:][0], ds[0])
        np.testing.assert_array_equal(ds[:][1], ds[1])
        np.testing.assert_array_equal(ds[:][2], ds[2])

        ds.close()
        with dataset('ds.h5') as ds:
            assert ds[0].shape == (10, 1)
Example #26
0
def test_atom_pairs_featurizer():
    with tempdir():
        shell('msmb AtomIndices -o all.txt --all -d -p %s/alanine_dipeptide/ala2.pdb' % get_data_home()),
        shell("msmb AtomPairsFeaturizer --trjs '{data_home}/alanine_dipeptide/*.dcd'"
              " --out pairs --pair_indices all.txt"
              " --top {data_home}/alanine_dipeptide/ala2.pdb".format(
                  data_home=get_data_home()))
        ds = dataset('pairs')
        assert len(ds) == 10
        assert ds[0].shape[1] == len(np.loadtxt('all.txt')**2)
        print(ds.provenance)
Example #27
0
def test_hdf5_1():
    with tempdir():
        ds = dataset('ds.h5', 'w', 'hdf5')
        print(ds.provenance)
        ds[0] = np.zeros(10)
        np.testing.assert_array_equal(ds.get(0), np.zeros(10))
        assert list(ds.keys()) == [0]
        assert len(ds) == 1

        ds[0] = np.random.randn(10, 1)
        ds[1] = np.random.randn(10, 2)
        ds[2] = np.random.randn(10, 3)

        np.testing.assert_array_equal(ds[:][0], ds[0])
        np.testing.assert_array_equal(ds[:][1], ds[1])
        np.testing.assert_array_equal(ds[:][2], ds[2])

        ds.close()
        with dataset('ds.h5') as ds:
            assert ds[0].shape == (10, 1)
Example #28
0
def test_union():
    with tempdir():
        # This doesn't work with py2.6
        with dataset('ds1.h5', 'w', 'hdf5') as ds1, \
             dataset('ds2.h5', 'w', 'hdf5') as ds2:
            ds1[0] = np.random.randn(10, 2)
            ds1[1] = np.random.randn(10)
            ds2[0] = np.random.randn(10, 4)
            ds2[1] = np.random.randn(10, 4)

            # Compare row sums
            rs1 = np.sum(ds1[0], axis=1) + np.sum(ds2[0], axis=1)
            rs2 = ds1[1] + np.sum(ds2[1], axis=1)

        mds = dataset(['ds1.h5', 'ds2.h5'])

        assert len(mds) == 2
        assert mds[0].shape == (10, 6)
        assert mds[1].shape == (10, 5)
        np.testing.assert_array_almost_equal(np.sum(mds[0], axis=1), rs1)
        np.testing.assert_array_almost_equal(np.sum(mds[1], axis=1), rs2)
Example #29
0
def test_4():
    path = tempfile.mkdtemp()
    shutil.rmtree(path)
    try:
        ds = dataset(path, 'w', 'dir-npy')
        ds[0] = np.random.randn(10, 2)
        v = ds.get(0, mmap=True)
        assert isinstance(v, np.memmap)
        np.testing.assert_array_equal(ds[0], v)
        del v  # close the underlying file
    finally:
        shutil.rmtree(path)
Example #30
0
def test_union():
    with tempdir():
        # This doesn't work with py2.6
        with dataset('ds1.h5', 'w', 'hdf5') as ds1, \
             dataset('ds2.h5', 'w', 'hdf5') as ds2:
            ds1[0] = np.random.randn(10, 2)
            ds1[1] = np.random.randn(10)
            ds2[0] = np.random.randn(10,4)
            ds2[1] = np.random.randn(10,4)

            # Compare row sums
            rs1 = np.sum(ds1[0], axis=1) + np.sum(ds2[0], axis=1)
            rs2 = ds1[1] + np.sum(ds2[1], axis=1)

        mds = dataset(['ds1.h5', 'ds2.h5'])

        assert len(mds) == 2
        assert mds[0].shape == (10, 6)
        assert mds[1].shape == (10, 5)
        np.testing.assert_array_almost_equal(np.sum(mds[0], axis=1), rs1)
        np.testing.assert_array_almost_equal(np.sum(mds[1], axis=1), rs2)
Example #31
0
def Get_rawposition_features_villin():
 import os 
 import shutil
 os.chdir('/homes/anuginueni/traj_villin')
 if(os.path.isdir('./rawpositions')):  
   shutil.rmtree('./rawpositions')
 from msmbuilder.dataset import dataset
 xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5)
 from msmbuilder.featurizer import RawPositionsFeaturizer        #for raw positions          
 featurizer = RawPositionsFeaturizer()       #for  raw positions
 rawpositions = xyz.fit_transform_with(featurizer, 'rawpositions/', fmt='dir-npy') #for rawpositions
 return rawpositions
Example #32
0
def test_4():
    path = tempfile.mkdtemp()
    shutil.rmtree(path)
    try:
        ds = dataset(path, 'w', 'dir-npy')
        ds[0] = np.random.randn(10, 2)
        v = ds.get(0, mmap=True)
        assert isinstance(v, np.memmap)
        np.testing.assert_array_equal(ds[0], v)
        del v  # close the underlying file
    finally:
        shutil.rmtree(path)
Example #33
0
def test_superpose_featurizer():
    with tempdir():
        shell('msmb AtomIndices -o all.txt --all -a -p %s/alanine_dipeptide/ala2.pdb' % get_data_home()),
        shell("msmb SuperposeFeaturizer --trjs '{data_home}/alanine_dipeptide/*.dcd'"
              " --out distances --atom_indices all.txt"
              " --reference_traj {data_home}/alanine_dipeptide/ala2.pdb"
              " --top {data_home}/alanine_dipeptide/ala2.pdb".format(
                  data_home=get_data_home()))
        ds = dataset('distances')
        assert len(ds) == 10
        assert ds[0].shape[1] == len(np.loadtxt('all.txt'))
        print(ds.provenance)
Example #34
0
def test_append_dirnpy():
    path = tempfile.mkdtemp()
    shutil.rmtree(path)
    try:
        with dataset(path, 'w', 'dir-npy') as ds:
            ds[0] = np.random.randn(10, 2)
        with dataset(path, 'a', 'dir-npy') as ds:
            ds[1] = np.random.randn(10, 2)
        with dataset(path, 'a', 'dir-npy') as ds:
            ds[2] = np.random.randn(10, 2)
        with dataset(path, 'a', 'dir-npy') as ds:
            # Overwrite
            ds[2] = np.random.randn(10, 2)

        np.testing.assert_array_equal(ds[:][0], ds[0])
        np.testing.assert_array_equal(ds[:][1], ds[1])
        np.testing.assert_array_equal(ds[:][2], ds[2])


    finally:
        shutil.rmtree(path)
Example #35
0
def test_atom_pairs_featurizer():
    with tempdir():
        shell('msmb AtomIndices -o all.txt --all -d -p '
              '%s/alanine_dipeptide/ala2.pdb' % get_data_home()),
        shell("msmb AtomPairsFeaturizer "
              "--trjs '{data_home}/alanine_dipeptide/*.dcd'"
              " --transformed pairs --pair_indices all.txt"
              " --top {data_home}/alanine_dipeptide/ala2.pdb"
              .format(data_home=get_data_home()))
        ds = dataset('pairs')
        assert len(ds) == 10
        assert ds[0].shape[1] == len(np.loadtxt('all.txt') ** 2)
        print(ds.provenance)
def test_dataset():
    with tempdir():
        # This doesn't work with py2.6
        with dataset('ds1.h5', 'w', 'hdf5') as ds1, \
                dataset('ds2.h5', 'w', 'hdf5') as ds2:
            ds1[0] = np.random.randn(10, 2)
            ds1[1] = np.random.randn(5, 2)
            ds2[0] = np.random.randn(10, 4)
            ds2[1] = np.random.randn(5, 4)

            # Compare row sums
            rs1 = np.sum(ds1[0], axis=1) + np.sum(ds2[0], axis=1)
            rs2 = np.sum(ds1[1], axis=1) + np.sum(ds2[1], axis=1)

            fu = FeatureUnion(normalize=False)
            mds = fu.fit_transform((ds1, ds2))

        assert len(mds) == 2
        assert mds[0].shape == (10, 6)
        assert mds[1].shape == (5, 6)
        np.testing.assert_array_almost_equal(np.sum(mds[0], axis=1), rs1)
        np.testing.assert_array_almost_equal(np.sum(mds[1], axis=1), rs2)
Example #37
0
    def start(self):
        ds = dataset(self.dataset, mode='r')
        assert len(ds) == 1, "Only support one at a time for now"
        ds = ds[0]

        dat_fn = "{}.txt".format(self.out_prefix)
        tcl_fn = "{}.tcl".format(self.out_prefix)
        np.savetxt(dat_fn, ds, fmt="%.5f")
        with open(tcl_fn, 'w') as f:
            f.write(VMDSCRIPT.format(
                traj_fn=self.traj, step=self.stride, top_fn=self.top,
                dat_fn=dat_fn
            ))
Example #38
0
def test_dataset():
    with tempdir():
        # This doesn't work with py2.6
        with dataset('ds1.h5', 'w', 'hdf5') as ds1, \
                dataset('ds2.h5', 'w', 'hdf5') as ds2:
            ds1[0] = np.random.randn(10, 2)
            ds1[1] = np.random.randn(5, 2)
            ds2[0] = np.random.randn(10, 4)
            ds2[1] = np.random.randn(5, 4)

            # Compare row sums
            rs1 = np.sum(ds1[0], axis=1) + np.sum(ds2[0], axis=1)
            rs2 = np.sum(ds1[1], axis=1) + np.sum(ds2[1], axis=1)

            fu = FeatureUnion(normalize=False)
            mds = fu.fit_transform((ds1, ds2))

        assert len(mds) == 2
        assert mds[0].shape == (10, 6)
        assert mds[1].shape == (5, 6)
        np.testing.assert_array_almost_equal(np.sum(mds[0], axis=1), rs1)
        np.testing.assert_array_almost_equal(np.sum(mds[1], axis=1), rs2)
Example #39
0
def test_superpose_featurizer():
    with tempdir():
        shell('msmb AtomIndices -o all.txt --all -a -p '
              '%s/alanine_dipeptide/ala2.pdb' % get_data_home()),
        shell("msmb SuperposeFeaturizer "
              "--trjs '{data_home}/alanine_dipeptide/*.dcd'"
              " --transformed distances --atom_indices all.txt"
              " --reference_traj {data_home}/alanine_dipeptide/ala2.pdb"
              " --top {data_home}/alanine_dipeptide/ala2.pdb"
              .format(data_home=get_data_home()))
        ds = dataset('distances')
        assert len(ds) == 10
        assert ds[0].shape[1] == len(np.loadtxt('all.txt'))
        print(ds.provenance)
def main():
    import argparse, textwrap
    parser = argparse.ArgumentParser(
        usage=textwrap.dedent(
            '''Use "python %(prog)s -h" for more information.'''),
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument(
        'pdbpath',
        help=textwrap.dedent('''[required] Path to pdb trajectories.'''))
    parser.add_argument('target',
                        help=textwrap.dedent('''[required] Path to target pdb.
        Note: The target pdb should have the same number of atoms in structure with that in pdb trajectories. '''
                                             ))
    args = parser.parse_args()

    from msmbuilder.dataset import dataset
    coords = dataset(args.pdbpath)
    print '%i trajectories found. ' % len(coords)

    ## featurize
    features = featurize_trajectories(coords, 'ContactFeaturizer')
    #print "features: (n_samples, n_features) = (%i, %i) for each trajectory \n" % (features[0].shape[0], features[0].shape[1])

    import mdtraj as md
    target = md.load(args.target)

    native_contact_dists, native_contact_pairs = md.compute_contacts(
        target, scheme='ca')
    native_contact_pairs = native_contact_pairs[np.where(
        native_contact_dists[0] <= 0.75)]
    n_native_contact = len(native_contact_pairs)
    print "Target structure has %i pairs of CA-CA contact in total. \n" % n_native_contact

    from msmbuilder.featurizer import ContactFeaturizer
    native_contact_to_target = np.concatenate(
        ContactFeaturizer(
            contacts=native_contact_pairs,
            scheme='ca').fit_transform(coords))  # (n_samples, n_pairs)
    native_contact_to_target = np.select(
        [native_contact_to_target <= 0.75, native_contact_to_target > 0.75],
        [1, 0])
    native_contact_to_target = np.sum(native_contact_to_target, axis=1)

    with open(
            '%s.%s.number_native_contact.dat' %
        (get_basename_no_ext(args.target), get_basename_no_ext(args.pdbpath)),
            'w') as f:
        for e in native_contact_to_target:
            print >> f, '%i %i %.3f' % (n_native_contact, e,
                                        e * 1. / n_native_contact)
Example #41
0
def test_3():
    path = tempfile.mkdtemp()
    shutil.rmtree(path)
    try:
        ds = dataset(path, 'w', 'dir-npy')
        ds[0] = np.random.randn(10, 2)
        ds[1] = np.random.randn(10, 2)
        ds[2] = np.random.randn(10, 2)

        np.testing.assert_array_equal(ds[:][0], ds[0])
        np.testing.assert_array_equal(ds[:][1], ds[1])
        np.testing.assert_array_equal(ds[:][2], ds[2])

    finally:
        shutil.rmtree(path)
Example #42
0
def test_transform_command_1():
    with tempdir():
        shell("msmb KCenters -i {data_home}/alanine_dipeptide/*.dcd "
              "-o model.pkl --top {data_home}/alanine_dipeptide/ala2.pdb "
              "--metric rmsd".format(data_home=get_data_home()))
        shell("msmb TransformDataset -i {data_home}/alanine_dipeptide/*.dcd "
              "-m model.pkl -t transformed.h5 --top "
              "{data_home}/alanine_dipeptide/ala2.pdb"
              .format(data_home=get_data_home()))

        eq(dataset('transformed.h5')[0], load('model.pkl').labels_[0])

    with tempdir():
        shell("msmb KCenters -i {data_home}/alanine_dipeptide/trajectory-0.dcd "
              "-o model.pkl --top {data_home}/alanine_dipeptide/ala2.pdb "
              "--metric rmsd".format(data_home=get_data_home()))
Example #43
0
def test_3():
    path = tempfile.mkdtemp()
    shutil.rmtree(path)
    try:
        ds = dataset(path, 'w', 'dir-npy')
        ds[0] = np.random.randn(10, 2)
        ds[1] = np.random.randn(10, 2)
        ds[2] = np.random.randn(10, 2)

        np.testing.assert_array_equal(ds[:][0], ds[0])
        np.testing.assert_array_equal(ds[:][1], ds[1])
        np.testing.assert_array_equal(ds[:][2], ds[2])


    finally:
        shutil.rmtree(path)
Example #44
0
def Get_dihedral_features_villin():
 import os 
 import shutil
 import mdtraj as md
 os.chdir('/homes/anuginueni/traj_villin')
 if(os.path.isdir('./diheds')):  
   shutil.rmtree('./diheds')
 from msmbuilder.dataset import dataset
 t=md.load( "/homes/anuginueni/traj_villin/trajectory-331.xtc",top='/homes/anuginueni/traj_villin/filtered.pdb',stride=5)
 xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) 
 from msmbuilder.featurizer import DihedralFeaturizer        #for dihedrals          
 featurizer = DihedralFeaturizer(types=['phi', 'psi'])       #for dihedrals
 diheds = xyz.fit_transform_with(featurizer, 'diheds/', fmt='dir-npy') #for dihedrals
 des_feat=featurizer.describe_features(t)
 res = [ sub['resids'] for sub in des_feat ]
 print(str(res))
 return diheds
Example #45
0
def Get_contacts_features_villin():
 import os 
 import shutil
 import mdtraj as md
 os.chdir('/homes/anuginueni/traj_villin')
 if(os.path.isdir('./contacts')):  
   shutil.rmtree('./contacts')
 from msmbuilder.dataset import dataset
 xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5) 
 t=md.load( "/homes/anuginueni/traj_villin/trajectory-331.xtc",top='/homes/anuginueni/traj_villin/filtered.pdb',stride=5)
 from msmbuilder.featurizer import ContactFeaturizer        #for contacts          

 featurizer = ContactFeaturizer(scheme='ca')       #for contacts
 des_feat=featurizer.describe_features(t)
 res = [ sub['resids'] for sub in des_feat ]
 print(str(res))
 contacts = xyz.fit_transform_with(featurizer, 'contacts/', fmt='dir-npy') #for contacts
 return contacts
Example #46
0
def test_items():
    with tempdir():
        ds = dataset('ds.h5', 'w', 'hdf5')

        ds[0] = np.random.randn(10, 1)
        ds[1] = np.random.randn(10, 2)
        ds[5] = np.random.randn(10, 3)

        # NOTE!
        # ds[:] does not work for non-contiguous keys.

        keys = [0, 1, 5]

        for i, (k, v) in enumerate(ds.items()):
            assert k == keys[i]
            np.testing.assert_array_equal(ds[k], v)

        ds.close()
Example #47
0
def test_MSMBuilderDatasetLoader_1():
    from msmbuilder.dataset import dataset

    path = tempfile.mkdtemp()
    shutil.rmtree(path)
    try:
        x = np.random.randn(10, 2)
        ds = dataset(path, 'w', 'dir-npy')
        ds[0] = x

        loader = MSMBuilderDatasetLoader(path, fmt='dir-npy')
        X, y = loader.load()

        assert np.all(X[0] == x)
        assert y is None

    finally:
        shutil.rmtree(path)
Example #48
0
def Get_combined_features_villin():                                         
  from msmbuilder.featurizer import DihedralFeaturizer
  from msmbuilder.featurizer import ContactFeaturizer                            
  diheds= DihedralFeaturizer()
  contacts=ContactFeaturizer()
  features=[("di_villin",diheds),("con_villin",contacts)]
  import os
  import shutil
  os.chdir('/homes/anuginueni/traj_villin')
  if(os.path.isdir('/homes/anuginueni/traj_villin/combined')):
   shutil.rmtree('/homes/anuginueni/traj_villin/combined')
  from msmbuilder.dataset import dataset
  xyz = dataset( "/homes/anuginueni/traj_villin/*.xtc",topology='/homes/anuginueni/traj_villin/filtered.pdb',stride=5)
  from msmbuilder.feature_selection import FeatureSelector

  comb_features=FeatureSelector(features)
  co=xyz.fit_transform_with(comb_features, '/homes/anuginueni/traj_villin/combined/', fmt='dir-npy')
  return co
Example #49
0
    def test_partial_transform(self):
        with open(os.devnull) as dn:
            subprocess.call([
                'msmb', 'SolventShellsFeaturizer', '--trjs', self.traj_fn,
                '--solute_indices', self.ute_fn, '--solvent_indices',
                self.vent_fn, '--n_shells', '3', '--shell_width', '1', '--out',
                self.outfn
            ],
                            stdout=dn,
                            stderr=dn)
        data = dataset(self.outfn)[0]

        norm = np.asarray([4 * np.pi * r**2 for r in [0.5, 1.5, 2.5]])
        should_be = np.array([[2, 0, 0], [0, 2, 0], [0, 0, 2], [0, 0, 0],
                              [0, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1],
                              [0, 0, 0]]) / norm

        np.testing.assert_array_equal(data, should_be)
Example #50
0
def test_items():
    with tempdir():
        ds = dataset('ds.h5', 'w', 'hdf5')

        ds[0] = np.random.randn(10, 1)
        ds[1] = np.random.randn(10, 2)
        ds[5] = np.random.randn(10, 3)

        # NOTE!
        # ds[:] does not work for non-contiguous keys.

        keys = [0, 1, 5]

        for i, (k, v) in enumerate(ds.items()):
            assert k == keys[i]
            np.testing.assert_array_equal(ds[k], v)

        ds.close()
Example #51
0
def test_MSMBuilderDatasetLoader_1():
    from msmbuilder.dataset import dataset

    path = tempfile.mkdtemp()
    shutil.rmtree(path)
    try:
        x = np.random.randn(10, 2)
        ds = dataset(path, 'w', 'dir-npy')
        ds[0] = x

        loader = MSMBuilderDatasetLoader(path, fmt='dir-npy')
        X, y = loader.load()

        assert np.all(X[0] == x)
        assert y is None

    finally:
        shutil.rmtree(path)
Example #52
0
def test_items():
    with tempdir():
        ds = dataset('ds.h5', 'w', 'hdf5')

        ds[0] = np.random.randn(10, 1)
        ds[1] = np.random.randn(10, 2)
        ds[5] = np.random.randn(10, 3)

        keys = [0, 1, 5]

        for i, (k, v) in enumerate(ds.items()):
            assert k == keys[i]
            np.testing.assert_array_equal(ds[k], v)

        np.testing.assert_array_equal(ds[:][0], ds[0])
        np.testing.assert_array_equal(ds[:][1], ds[1])
        np.testing.assert_array_equal(ds[:][2], ds[5])

        ds.close()
Example #53
0
import pandas as pd
from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition
from sklearn.pipeline import make_pipeline
import mdtraj as md

tica_lagtime = 1600

trajectories = dataset.MDTrajDataset("./trajectories/*.h5")
t0 = trajectories[0][0]

dih = dataset.NumpyDirDataset("./dihedrals/")
X = dataset.dataset("./tica/tica%d.h5" % tica_lagtime)
Xf = np.concatenate(X)

tica_model = utils.load("./tica/tica%d.pkl" % tica_lagtime)
dih_model = utils.load("./dihedrals/model.pkl")


d = dih_model.describe_features(t0)
d = pd.DataFrame(d)

d.ix[argsort(tica_model.eigenvectors_[:, 0])[0:5]]
d.ix[argsort(tica_model.eigenvectors_[:, 0])[-5:]]
Example #54
0
def test_hdf5_2():
    with tempdir():
        with dataset('ds.h5', 'w', 'hdf5') as ds:
            ds2 = ds.create_derived('ds2.h5')
            print(ds2.provenance)
            ds2.close()
Example #55
0
 def load(self):
     from msmbuilder.dataset import dataset
     ds = dataset(self.path, mode='r', fmt=self.fmt, verbose=self.verbose)
     print('Dataset provenance:\n')
     print(ds.provenance)
     return ds, None
Example #56
0
import mdtraj as md
from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition
from sklearn.pipeline import make_pipeline

trj0 = md.load("traj-refine_implicit_md.xtc", top="topol-renumbered-implicit.pdb")
trj0 = trj0[0:50]

X = dataset.dataset("./tica.h5")
Xf = np.concatenate(X)

dih_model = utils.load("./dihedrals/model.pkl")
tica_model = utils.load("./tica.pkl")

pipeline = make_pipeline(dih_model, tica_model)
x0 = pipeline.transform([trj0])[0]

hexbin(Xf[:, 0], Xf[:, 1], bins='log')
plot(x0[:, 0], x0[:, 1], 'kx')
map(lambda k: annotate(k, xy=x0[k, 0:2], fontsize=14), arange(len(x0)))
Example #57
0
def test_union_no_longer_exists():
    with assert_raises_regexp(ValueError,
                              r".*[Uu]se msmbuilder\.featurizer\.FeatureUnion.*"):
        mds = dataset(['ds1.h5', 'ds2.h5'], fmt='hdf5-union')
Example #58
0
import matplotlib
matplotlib.use('Agg')

from msmbuilder.dataset import dataset
import matplotlib.pyplot as plt
import numpy as np

trajs = dataset('tica_trajs.h5') #Load file
trajs = np.concatenate(trajs) # Flatten list of trajectories
plt.hexbin(trajs[:,0],trajs[:,1], bins='log', mincnt=1)

plt.savefig('msm_fig.png')