def test_write_to_csv_propagate_filenames(self):
        from pyemma.coordinates import source, tica
        with TemporaryDirectory() as td:
            data = [np.random.random((20, 3))] * 3
            fns = [
                os.path.join(td, f)
                for f in ('blah.npy', 'blub.npy', 'foo.npy')
            ]
            for x, fn in zip(data, fns):
                np.save(fn, x)
            reader = source(fns)
            assert reader.filenames == fns
            tica_obj = tica(reader, lag=1, dim=2)
            tica_obj.write_to_csv(extension=".exotic", chunksize=3)
            res = sorted([
                os.path.abspath(x) for x in glob(td + os.path.sep + '*.exotic')
            ])
            self.assertEqual(len(res), len(fns))
            desired_fns = sorted([s.replace('.npy', '.exotic') for s in fns])
            self.assertEqual(res, desired_fns)

            # compare written results
            expected = tica_obj.get_output()
            actual = source(list(s.replace('.npy', '.exotic')
                                 for s in fns)).get_output()
            assert len(actual) == len(fns)
            for a, e in zip(actual, expected):
                np.testing.assert_allclose(a, e)
Exemple #2
0
 def setUp(self):
     self.readers = []
     data_dir = pkg_resources.resource_filename('pyemma.coordinates.tests', 'data')
     # three md trajs
     trajs = glob(data_dir + "/bpti_0*.xtc")
     top = os.path.join(data_dir, 'bpti_ca.pdb')
     self.readers.append(source(trajs, top=top))
     self.readers[0].featurizer.add_all()
     ndim = self.readers[0].ndim
     # three random arrays
     lengths = self.readers[0].trajectory_lengths()
     arrays = [np.random.random( (length, ndim) ) for length in lengths]
     self.readers.append(source(arrays))
    def test_fragmented_trajs(self):
        """ build two fragmented readers consisting out of two fragments each and check if they are merged properly."""
        segment_0 = np.arange(20)
        segment_1 = np.arange(20, 40)

        s1 = source([(segment_0, segment_1)])
        s2 = source([(segment_0, segment_1)])

        sm = SourcesMerger((s1, s2))

        out = sm.get_output()
        x = np.atleast_2d(np.arange(40))
        expected = [np.concatenate((x, x), axis=0).T]

        np.testing.assert_equal(out, expected)
    def test_lagged_iterator(self):
        import pyemma.coordinates as coor
        from pyemma.coordinates.tests.util import create_traj, get_top

        trajectory_length = 4720
        lagtime = 1000
        n_trajs = 15

        top = get_top()
        trajs_data = [
            create_traj(top=top, length=trajectory_length)
            for _ in range(n_trajs)
        ]
        trajs = [t[0] for t in trajs_data]
        xyzs = [t[1].reshape(-1, 9) for t in trajs_data]

        reader = coor.source(trajs, top=top, chunksize=5000)

        for chunk in [
                None, 0, trajectory_length, trajectory_length + 1,
                trajectory_length + 1000
        ]:
            it = reader.iterator(lag=lagtime,
                                 chunk=chunk,
                                 return_trajindex=True)
            with it:
                for itraj, X, Y in it:
                    np.testing.assert_equal(X.shape, Y.shape)
                    np.testing.assert_equal(X.shape[0],
                                            trajectory_length - lagtime)
                    np.testing.assert_array_almost_equal(
                        X, xyzs[itraj][:trajectory_length - lagtime])
                    np.testing.assert_array_almost_equal(
                        Y, xyzs[itraj][lagtime:])
Exemple #5
0
def prepare_tica_inputs(datasets, topfile, features=None, selection=None, chunksize=10000, singletraj=False):
    #print("topfile: ", topfile)
    #print("selection", selection)
    if isinstance(topfile, mdtraj.Topology):
        topology = topfile
    elif os.path.exists(topfile):
        topology = mdtraj.load(topfile).topology
    else:
        print("Cannot find topology file: %s"%topfile)

    assert isinstance(topology, mdtraj.Topology)

    if selection:
        topology = topology.subset(topology.select(selection_string=selection))

    #if isinstance(features, featurizer):
    #    feat = features
    feat = coor.featurizer(topology)
    if not features: # then use inverse Ca distances
        # PyEMMA equivalent: `feat.add_inverse_distances(feat.select_backbone())`
        features = {'add_inverse_distances': { 'select_Ca': None }}

    apply_feat_part(feat, features)
    ticainputs, input_order = squish_tica_inputfiles(datasets, feat)
    if singletraj:
        ticainputs = [ticainputs]
    #print("remove this & below!")
    #print(ticainputs)
    #datasets['analysis']['dim_reduction']['input_order'].append(nm)
    tica_inp = coor.source(ticainputs, feat, chunksize=chunksize)
    return tica_inp, input_order
Exemple #6
0
    def test_fragmented_reader(self):
        from pyemma.coordinates.tests.util import create_traj
        from pyemma.util.files import TemporaryDirectory

        top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb')
        trajfiles = []

        with TemporaryDirectory() as d:
            for _ in range(3):
                f, _, _ = create_traj(top_file, dir=d)
                trajfiles.append(f)
            # three trajectories: one consisting of all three, one consisting of the first,
            # one consisting of the first and the last
            frag_trajs = [
                trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]]
            ]
            chunksize = 232
            source = coor.source(frag_trajs, top=top_file, chunksize=chunksize)
            params = {
                'chunksize': chunksize,
                'ndim': source.ndim,
                '_trajectories': trajfiles
            }
            restored = self.compare(source, params)

            np.testing.assert_equal(source.get_output(), restored.get_output())
Exemple #7
0
 def test_h5_reader(self):
     h5_file = pkg_resources.resource_filename(__name__,
                                               'data/bpti_mini.h5')
     params = dict(selection='/coordinates')
     source = coor.source(h5_file, **params)
     restored = self.compare(source, params)
     np.testing.assert_equal(source.get_output(), restored.get_output())
Exemple #8
0
    def setUp(self):
        self.eps = 1e-10
        path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep
        self.pdbfile = os.path.join(path, 'bpti_ca.pdb')
        self.trajfiles = [os.path.join(path, 'bpti_001-033.xtc'),
                          os.path.join(path, 'bpti_034-066.xtc'),
                          os.path.join(path, 'bpti_067-100.xtc')
                          ]

        # Create random sets of files and frames to be retrieved from trajfiles
        n_members_set1 = 10
        n_members_set2 = 20
        set_1 = np.vstack((np.random.permutation([0, 2] * n_members_set1)[:n_members_set1],
                           np.random.randint(32, size=n_members_set1))).T

        set_2 = np.vstack((np.random.permutation([0, 2] * n_members_set2)[:n_members_set2],
                           np.random.randint(32, size=n_members_set2))).T

        self.sets = [set_1, set_2]

        self.subdir = tempfile.mkdtemp(suffix='save_trajs_test/')

        # Instantiate the reader
        self.reader = coor.source(self.trajfiles, top=self.pdbfile)
        self.reader.chunksize = 30
        self.n_pass_files = [self.subdir + 'n_pass.set_%06u.xtc' % ii for ii in range(len(self.sets))]
        self.one_pass_files = [self.subdir + '1_pass.set_%06u.xtc' % ii for ii in range(len(self.sets))]

        self.traj_ref = save_traj_w_md_load_frame(self.reader, self.sets)
        self.strides = [2, 3, 5]
Exemple #9
0
    def test_fragmented_xtc(self):
        from pyemma.coordinates.tests.util import create_traj

        top_file = pkg_resources.resource_filename(__name__, 'data/test.pdb')
        trajfiles = []
        for _ in range(3):
            f, _, _ = create_traj(top_file)
            trajfiles.append(f)
        try:
            # three trajectories: one consisting of all three, one consisting of the first,
            # one consisting of the first and the last
            source = coor.source(
                [trajfiles, [trajfiles[0]], [trajfiles[0], trajfiles[2]]],
                top=top_file)
            source.chunksize = 1000

            out = source.get_output(stride=1)
            trajs = [
                mdtraj.load(trajfiles[i], top=top_file).xyz.reshape(-1, 9)
                for i in range(0, 3)
            ]

            np.testing.assert_equal(out[0], np.vstack(trajs))
            np.testing.assert_equal(out[1], trajs[0])
            np.testing.assert_equal(out[2], np.vstack((trajs[0], trajs[2])))
        finally:
            for t in trajfiles:
                try:
                    os.unlink(t)
                except EnvironmentError:
                    pass
Exemple #10
0
    def partial_fit(self, X):
        """ incrementally update the covariances and mean.

        Parameters
        ----------
        X: array, list of arrays, PyEMMA reader
            input data.

        Notes
        -----
        The projection matrix is first being calculated upon its first access.
        """
        from pyemma.coordinates import source
        iterable = source(X)

        if isinstance(self.dim, int):
            indim = iterable.dimension()
            if not self.dim <= indim:
                raise RuntimeError(
                    "requested more output dimensions (%i) than dimension"
                    " of input data (%i)" % (self.dim, indim))

        self._covar = self._init_covar(partial=True)
        self._covar.partial_fit(iterable)
        self.model.update_model_params(
            mean_0=self._covar.mean,  # TODO: inefficient, fixme
            mean_t=self._covar.mean_tau,
            C00=self._covar.C00_,
            C0t=self._covar.C0t_,
            Ctt=self._covar.Ctt_)

        self._estimated = False
        return self.model
Exemple #11
0
    def _test_fragment_reader(self, file_format, stride, lag, chunksize):
        trajs = self.test_trajs[file_format]

        reader = coor.source([trajs], top=self.pdb_file, chunksize=chunksize)
        assert isinstance(reader, FragmentedTrajectoryReader)

        data = np.vstack(self.traj_data)
        itraj = None

        if lag > 0:
            collected = []
            collected_lagged = []
            for itraj, X, Y in reader.iterator(stride=stride, lag=lag):
                collected.append(X)
                collected_lagged.append(Y)
            assert collected
            assert collected_lagged
            assert len(collected) == len(collected_lagged)
            collected = np.vstack(collected)
            collected_lagged = np.vstack(collected_lagged)
            np.testing.assert_allclose(data[::stride][0:len(collected_lagged)], collected, atol=self.eps,
                                                    err_msg="lag={}, stride={}, cs={}".format(lag, stride, chunksize
                                                 ))
            np.testing.assert_allclose(data[lag::stride], collected_lagged, atol=self.eps)
        else:
            collected = []
            for itraj, X in reader.iterator(stride=stride):
                collected.append(X)
            assert collected
            collected = np.vstack(collected)
            np.testing.assert_allclose(data[::stride], collected, atol=self.eps)
            assert itraj == 0 # only one trajectory
    def test_with_save_traj(self):
        path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep

        pdb_file = os.path.join(path, 'bpti_ca.pdb')
        traj_files = [
            os.path.join(path, 'bpti_001-033.xtc'),
            os.path.join(path, 'bpti_034-066.xtc'),
            os.path.join(path, 'bpti_067-100.xtc')
        ]

        source_frag = coor.source([traj_files], top=pdb_file)
        full_data = source_frag.get_output()[0]
        last_frame_fragment_0 = [0,32]
        first_frame_fragment_1 = [0,33]
        first_frame_fragment_2 = [0,66]

        reshape = lambda f: f.xyz.reshape((f.xyz.shape[0],f.xyz.shape[1] * f.xyz.shape[2])).squeeze()

        # Frames in the first fragment:
        frames = coor.save_traj(source_frag, [last_frame_fragment_0], None)
        np.testing.assert_equal(reshape(frames), full_data[32])

        # Frames the first and second fragments
        frames = coor.save_traj(source_frag, [last_frame_fragment_0, first_frame_fragment_1], None)
        np.testing.assert_equal(reshape(frames), full_data[np.array([32, 33])])

        # Frames only in the second fragment
        frames = coor.save_traj(source_frag, [first_frame_fragment_1], None)
        np.testing.assert_equal(reshape(frames), full_data[33])

        # Frames only in the second and third fragment
        frames = coor.save_traj(source_frag, [first_frame_fragment_1, first_frame_fragment_2], None)
        np.testing.assert_equal(reshape(frames), full_data[np.array([33, 66])])
 def test_non_matching_lengths(self):
     data = self.readers[1].data
     data = [data[0], data[1], data[2][:20]]
     self.readers.append(source(data))
     with self.assertRaises(ValueError) as ctx:
         SourcesMerger(self.readers)
     self.assertIn('matching', ctx.exception.args[0])
Exemple #14
0
    def test_assignment_multithread_minrsmd(self):
        # re-do assignment with multiple threads and compare results
        import pyemma.datasets as data
        d = data.get_bpti_test_data()
        reader = coor.source(d['trajs'], top=d['top'])

        N_centers = 9
        centers = np.asarray((reader.ra_itraj_jagged[0, [0, 1, 7]],
                              reader.ra_itraj_jagged[1, [32, 1, 23]],
                              reader.ra_itraj_jagged[2, [17, 8, 15]])).reshape(
                                  (N_centers, -1))
        chunksize = 1000

        assignment_mp = coor.assign_to_centers(reader,
                                               centers,
                                               n_jobs=2,
                                               chunksize=chunksize,
                                               metric='minRMSD')
        assignment_sp = coor.assign_to_centers(reader,
                                               centers,
                                               n_jobs=1,
                                               chunksize=chunksize,
                                               metric='minRMSD')

        np.testing.assert_equal(assignment_mp, assignment_sp)
Exemple #15
0
    def test_length_and_content_feature_reader_and_TICA(self):
        for stride in range(1, 100, 23):
            r = coor.source(self.trajnames, top=self.temppdb)
            t = coor.tica(data=r, lag=2, dim=2)
            # t.data_producer = r
            t.parametrize()

            # subsample data
            out_tica = t.get_output(stride=stride)
            out_reader = r.get_output(stride=stride)

            # get length in different ways
            len_tica = [x.shape[0] for x in out_tica]
            len_reader = [x.shape[0] for x in out_reader]
            len_trajs = t.trajectory_lengths(stride=stride)
            len_ref = [(x.shape[0]-1)//stride+1 for x in self.data]
            # print 'len_ref', len_ref

            # compare length
            np.testing.assert_equal(len_trajs, len_ref)
            self.assertTrue(len_ref == len_tica)
            self.assertTrue(len_ref == len_reader)

            # compare content (reader)
            for ref_data, test_data in zip(self.data, out_reader):
                ref_data_reshaped = ref_data.reshape((ref_data.shape[0], ref_data.shape[1]*3))
                self.assertTrue(np.allclose(ref_data_reshaped[::stride, :], test_data, atol=1E-3))
Exemple #16
0
    def test_partial_fit(self):
        from pyemma.coordinates import source
        reader = source(self.trajnames, top=self.temppdb)
        reader_output = reader.get_output()

        for output_params in [{'kinetic_map': False}, {'kinetic_map': True}, {'kinetic_map': False, 'commute_map': True}]:
            params = {'lag': 10, 'dim': self.dim}
            params.update(output_params)

            tica_obj = tica(**params)
            tica_obj.partial_fit(reader_output[0])
            assert not tica_obj._estimated
            # acccess eigenvectors to force diagonalization
            tica_obj.eigenvectors
            assert tica_obj._estimated

            tica_obj.partial_fit(reader_output[1])
            assert not tica_obj._estimated

            tica_obj.eigenvalues
            assert tica_obj._estimated

            for traj in reader_output[2:]:
                tica_obj.partial_fit(traj)

            # reference
            ref = tica(reader, **params)

            np.testing.assert_allclose(tica_obj.cov, ref.cov, atol=1e-15)
            np.testing.assert_allclose(tica_obj.cov_tau, ref.cov_tau, atol=1e-15)

            np.testing.assert_allclose(tica_obj.eigenvalues, ref.eigenvalues, atol=1e-15)
Exemple #17
0
    def setUp(self):
        self.eps = 1e-6
        path = os.path.join(os.path.split(__file__)[0], 'data')
        self.pdbfile = os.path.join(path, 'bpti_ca.pdb')
        self.trajfiles = [os.path.join(path, 'bpti_001-033.xtc'),
                          os.path.join(path, 'bpti_034-066.xtc'),
                          os.path.join(path, 'bpti_067-100.xtc')
                          ]

        # Create random sets of files and frames to be retrieved from trajfiles
        n_members_set1 = 10
        n_members_set2 = 20
        set_1 = np.vstack((np.random.permutation([0, 2] * n_members_set1)[:n_members_set1],
                           np.random.randint(32, size=n_members_set1))).T

        set_2 = np.vstack((np.random.permutation([0, 2] * n_members_set2)[:n_members_set2],
                           np.random.randint(32, size=n_members_set2))).T

        self.sets = [set_1, set_2]

        self.subdir = tempfile.mkdtemp(suffix='save_trajs_test')

        # Instantiate the reader
        self.reader = coor.source(self.trajfiles, top=self.pdbfile)
        self.reader.chunksize = 10
        self.n_pass_files = [self.subdir + 'n_pass.set_%06u.xtc' % ii for ii in xrange(len(self.sets))]
        self.one_pass_files = [self.subdir + '1_pass.set_%06u.xtc' % ii for ii in xrange(len(self.sets))]
Exemple #18
0
def DoubleProducts(Y1, Y2, filename, U=None):
    ''' Evaluate all products between two given time-series. Optionally,a
    linear transformation of the product basis can be computed instead.
    
    Parameters:
    -------------
    Y1, Y2: pyemma-reader, containing time series of basis functions.
    filename: str, name to be used to save the data for the product time series.
    U, ndarray, shape (r,s), where r must be identical to the product dimension
        of Y1 and Y2 and s is the number of linear combinations to be extracted.
    
    Returns:
    -------------
    pyemma-reader, containing the time-series of all possible products between
        the basis functions in Y1 and Y2.'''
    # Get the dimensions of both time-series:
    r1 = Y1.dimension()
    r2 = Y2.dimension()
    # Compute the product dimension:
    r = r1 * r2
    # Get the output dimension:
    if not (U is None):
        ro = U.shape[1]
    else:
        ro = r
    # Get the iterators for both time-series:
    I1 = Y1.iterator()
    I2 = Y2.iterator()
    # Prepare an empty array for the trajectory pieces:
    file_names = []
    q = 0
    ieval = np.zeros((0, ro))
    # Compute the products chunk by chunk:
    for piece in zip(I1, I2):
        # Get the trajectory number and the data:
        traj_id = piece[0][0]
        piece0 = piece[0][1]
        piece1 = piece[1][1]
        # Check if the last trajectory is finished:
        if traj_id > q:
            np.save(filename + "_%d.npy" % q, ieval)
            file_names.append(filename + "_%d.npy" % q)
            ieval = np.zeros((0, ro))
            q += 1
        # Compute all the products:
        chunkeval = np.einsum('ijk,imk->ijm', piece0[:, :, np.newaxis], piece1[:, :, np.newaxis])
        chunkeval = np.reshape(chunkeval, (chunkeval.shape[0], r))
        # Apply linear transform if necessary:
        if not (U is None):
            chunkeval = np.dot(chunkeval, U)
        # Stack the result underneath the previous results:
        ieval = np.vstack((ieval, chunkeval))
    # Save the last trajectory:
    np.save(filename + "_%d.npy" % q, ieval)
    file_names.append(filename + "_%d.npy" % q)
    # Build a new reader and return it:
    reader = pco.source(file_names)
    reader.chunksize = Y1.chunksize
    return reader
Exemple #19
0
 def test_MD_data(self):
     # this is too little data to get reasonable results. We just test to avoid exceptions
     path = os.path.join(os.path.split(__file__)[0], 'data')
     self.pdb_file = os.path.join(path, 'bpti_ca.pdb')
     self.xtc_file = os.path.join(path, 'bpti_mini.xtc')
     inp = source(self.xtc_file, top=self.pdb_file)
     # see if this doesn't raise
     ticamini = tica(inp, lag=1)
Exemple #20
0
    def partial_fit(self, X):
        from pyemma.coordinates import source
        iterable = source(X)

        self._estimate(iterable, partial=True)
        self._estimated = False

        return self
Exemple #21
0
 def __init__(self, topologyfile:str, trajfiles:list, workdir='automsm') -> None:
     super().__init__()
     self.topologyfile = os.path.abspath(topologyfile)
     self.trajfiles = [ os.path.abspath(trajfile) for trajfile in trajfiles ]
     self.workdir = workdir
     if not os.path.exists(workdir):
         os.mkdir(workdir)
     self.src = source(self.trajfiles, top=self.topologyfile)
Exemple #22
0
 def test_MD_data(self):
     # this is too little data to get reasonable results. We just test to avoid exceptions
     path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep
     self.pdb_file = os.path.join(path, 'bpti_ca.pdb')
     self.xtc_file = os.path.join(path, 'bpti_mini.xtc')
     inp = source(self.xtc_file, top=self.pdb_file)
     # see if this doesn't raise
     ticamini = tica(inp, lag=1)
Exemple #23
0
    def _test_base_reader(self, file_format, stride, skip, chunksize, transform):
        # TODO: remove this, when mdtraj-2.0 is released.
        if file_format == 'dcd' and stride > 1 and skip_stride_handling_old_mdtraj:
            raise unittest.SkipTest('wait for mdtraj 2.0')

        trajs = self.test_trajs[file_format]
        reader = coor.source(trajs, top=self.pdb_file, chunksize=chunksize)

        if transform == 'identity':
            reader = util.create_transform(reader)

        if chunksize is not None:
            np.testing.assert_equal(reader.chunksize, chunksize)

        it = reader.iterator(stride=stride, skip=skip, lag=0, chunk=chunksize)

        assert it.chunksize is not None
        if chunksize is None:
            max_frames = max_chunksize_from_config(reader.output_type().itemsize)
            assert it.chunksize <= max_frames
            # now we set the chunksize to max_frames, to be able to compare the actual shapes of iterator output.
            chunksize = max_frames

        traj_data = [data[skip::stride] for data in self.traj_data]
        valid_itraj = [i for i, x in enumerate(traj_data) if len(x) > 0]
        output = defaultdict(list)

        with it:
            current_itraj = None
            t = t_total = 0
            for itraj, chunk in it:
                # reset t upon next trajectory
                if itraj != current_itraj:
                    current_itraj = itraj
                    t = 0

                assert len(chunk) <= chunksize or chunksize == 0, '%s' % it
                if chunksize != 0 and len(traj_data[itraj]) - t >= chunksize:
                    assert len(chunk) == chunksize
                elif chunksize == 0:
                    assert len(chunk) == len(traj_data[itraj])

                output[itraj].append(chunk)

                t += len(chunk)
                t_total += len(chunk)

            for itraj in valid_itraj:
                assert itraj in output.keys()

            for itraj in output.keys():
                assert itraj in valid_itraj
                output[itraj] = np.vstack(output[itraj])
                np.testing.assert_allclose(output[itraj], traj_data[itraj], atol=self.eps)

            assert t_total == sum(len(x) for x in output.values())
            assert t_total == reader.n_frames_total(stride=stride, skip=skip)
Exemple #24
0
    def test_in_memory(self):
        data = np.random.random((100, 10))
        tica_obj = api.tica(lag=10, dim=1)
        reader = source(data)
        tica_obj.data_producer = reader

        tica_obj.in_memory = True
        tica_obj.parametrize()
        tica_obj.get_output()
Exemple #25
0
 def test_parametrize_with_stride(self):
     for stride in range(1, 100, 23):
         r = coor.source(self.trajnames, top=self.temppdb)
         tau = 5
         try:
             t = coor.tica(r, lag=tau, stride=stride, dim=2)
             # force_eigenvalues_le_one=True enables an internal consistency check in TICA
             self.assertTrue(np.all(t.eigenvalues <= 1.0 + 1.E-12))
         except RuntimeError:
             assert tau % stride != 0
 def test_pass_reader(self):
     from pyemma.coordinates import source
     reader = source(self.trajfiles, top=self.pdbfile)
     reader.in_memory = True
     inds = np.vstack((np.random.randint(0, 1), np.random.randint(0,
                                                                  100))).T
     traj_test = _frames_from_file(reader.filenames,
                                   self.pdbfile,
                                   inds,
                                   reader=reader)
Exemple #27
0
    def test_notify_changes_mixin(self):
        X_t = np.random.random((30, 30))
        source = coor.source(np.array(X_t))

        t1 = coor.tica(source)
        from pyemma.coordinates.transform import TICA
        t2 = TICA(lag=10)
        assert len(t1._stream_children) == 0
        t2.data_producer = t1
        assert t1._stream_children[0] == t2
Exemple #28
0
    def _test_lagged_reader(self, file_format, stride, skip, chunksize, lag):
        # TODO: remove this, when mdtraj-2.0 is released.
        if file_format == 'dcd' and stride > 1 and skip_stride_handling_old_mdtraj:
            raise unittest.SkipTest('wait for mdtraj 2.0')
        trajs = self.test_trajs[file_format]
        reader = coor.source(trajs, top=self.pdb_file, chunksize=chunksize)

        it = reader.iterator(stride=stride, skip=skip, lag=lag, chunk=chunksize)
        traj_data = [data[skip::stride] for data in self.traj_data]
        traj_data_lagged = [data[skip + lag::stride] for data in self.traj_data]
        valid_itrajs = [i for i, x in enumerate(traj_data_lagged) if len(x) > 0]

        assert it.chunksize is not None
        if chunksize is None:
            chunksize = max_chunksize_from_config(reader.output_type().itemsize)

        with it:
            current_itraj = None
            t = t_total = 0
            collected = defaultdict(list)
            collected_lag = defaultdict(list)

            for itraj, chunk, chunk_lagged in it:
                # reset t upon next trajectory
                if itraj != current_itraj:
                    current_itraj = itraj
                    t = 0
                assert len(chunk) <= chunksize or chunksize == 0
                if chunksize != 0 and len(traj_data[itraj]) - t >= chunksize:
                    assert len(chunk) <= chunksize
                elif chunksize == 0:
                    assert len(chunk) == len(chunk_lagged) == len(traj_data_lagged[itraj])
                collected[itraj].append(chunk)
                collected_lag[itraj].append(chunk_lagged)

                t += len(chunk)
                t_total += len(chunk)

        for itraj in valid_itrajs:
            assert itraj in collected.keys()
            assert itraj in collected_lag.keys()

        assert set(collected.keys()) == set(collected_lag.keys())
        for itraj in collected.keys():
            assert itraj in valid_itrajs
            collected[itraj] = np.vstack(collected[itraj])
            collected_lag[itraj] = np.vstack(collected_lag[itraj])
            # unlagged data is truncated to the length of the lagged data.
            max_len = len(traj_data_lagged[itraj])
            np.testing.assert_allclose(collected[itraj], traj_data[itraj][:max_len], atol=self.eps)
            np.testing.assert_allclose(collected_lag[itraj], traj_data_lagged[itraj], atol=self.eps)

        assert t_total == sum(len(x) for x in collected.values())
        assert t_total == reader.n_frames_total(stride=stride, skip=skip+lag)
Exemple #29
0
    def setUpClass(cls):
        from pyemma.datasets import get_bpti_test_data

        d = get_bpti_test_data()
        trajs, top = d['trajs'], d['top']
        s = source(trajs, top=top)

        t = tica(s, lag=1)

        c = cluster_kmeans(t)
        cls.model_file = tempfile.mktemp()
        c.save(cls.model_file, save_streaming_chain=True)
Exemple #30
0
def Reweight(f,pi,filename,minval):
    ''' This functions reweights a given basis trajectory by the inverse square
    root of the stationary distribution pi.
    
    Parameters:
    ------------
    f: pyemma-reader, contains the evaluation of the basis functions.
    pi: pyemma-reader, contains the evaluation of the stationary distribution.
    filename: str, filename for the evaluation files to be produced.
    minval: float, minimal value (greater than zero) allowed for the stationary
        distribution. All values smaller than minval are replaced by minval.
    
    Returns:
    -----------
    pyemma-reader, the reweighted basis.
    '''
    # Get the basis set size:
    r0 = f.dimension()
    # Get the iterators for both time-series:
    I1 = f.iterator()
    I2 = pi.iterator()
    # Prepare an empty array for the trajectory pieces:
    file_names = []
    q = 0
    ieval = np.zeros((0,r0))
    # Compute the products chunk by chunk:
    for piece in zip(I1,I2):
        # Get the trajectory number and the data:
        traj_id = piece[0][0]
        piece0 = np.copy(piece[0][1])
        piece1 = np.copy(piece[1][1])
        # Check if the last trajectory is finished:
        if traj_id > q:
            np.save(filename + "_%d.npy"%q,ieval)
            file_names.append(filename + "_%d.npy"%q)
            ieval = np.zeros((0,r0))
            q += 1
        # Reweight:
        # Replace too small and negative values:
        minind = piece1[:,0] < minval
        piece1[minind,:] = minval
        # Re-weight the basis functions:
        piece0 = piece0/np.sqrt(piece1)
        # Stack the result underneath the previous results:
        ieval = np.vstack((ieval,piece0))
    # Save the last trajectory:
    np.save(filename + "_%d.npy"%q,ieval)
    file_names.append(filename + "_%d.npy"%q)
    # Build a new reader and return it:
    reader = pco.source(file_names)
    reader.chunksize = f.chunksize
    return reader 
Exemple #31
0
    def test_with_fragmented_reader(self):
        # intenionally group bpti dataset to a fake fragmented traj
        frag_traj = [[self.trajfiles[0], self.trajfiles[1]], self.trajfiles[2]]
        reader = coor.source(frag_traj, top=self.pdbfile)

        traj = save_traj(reader, self.sets, None)
        traj_ref = save_traj_w_md_load_frame(self.reader, self.sets)

        # Check for diffs
        (found_diff, errmsg) = compare_coords_md_trajectory_objects(traj,
                                                                    traj_ref,
                                                                    atom=0)
        self.assertFalse(found_diff, errmsg)
def pyemma_feat(args):
    irow, featurizer_name, tops, indices = args
    i, row = irow
    traj, top = row['traj_fn'], tops[row['top_fn']]
    feat = featurizer(top)
    try:
        adder = getattr(feat, featurizer_name)
        adder(indexes=indices, cossin=True)
        feat_traj = np.squeeze(source(traj, features=feat).get_output(),
                               axis=0)
        return i, feat_traj
    except AttributeError:
        print("pyEMMA doesn't have {} as a featurizer".format(featurizer_name))
Exemple #33
0
def SaveEVFrames(dt, ev_traj, c, d, traj_inp=None, filename=None, topfile=None, nframes=None):
    ''' Save frames that correspond to eigenvector centers from md-trajectories
    to separate trajectory.
    
    Parameters:
    --------------
    traj_inp: List of underlying md-trajectories.
    ev_traj: List of eigenfunction trajectories.
    dt: Physical time step.
    c: ndarray, shape(nc,M), centers.
    d: ndarray, shape(nc,). admissible distances to the centers.
    filename: str, name of the center-trajectories
    topfile:str, topology-file
    nframes: int, number of frames per center and per trajectory.
    '''
    # Get the number of trajectories:
    ntraj = len(ev_traj)
    # Get the number of centers and eigenfunctions:
    nc, M = c.shape
    # Create a reader of eigenfunction data:
    ef = pco.source(ev_traj)
    ef.chunksize = np.min(ef.trajectory_lengths())
    # Get the output into memory, leaving out the first ef:
    psidata = ef.get_output(dimensions=np.arange(1, M + 1, dtype=int))
    cindices = []
    # Write out frames to a trajectory file:
    # Loop over the centers:
    for i in range(nc):
        # Create a list of possible frames:
        indices = []
        # Loop over the trajectory files:
        for m in range(ntraj):
            # Get the data for this traj:
            mdata = psidata[m]
            # Get the admissible frames for this trajectory:
            mind = np.where(np.any(np.abs(mdata - c[i, :]) <= d[i], axis=1))[0]
            # Make a random selection:
            if not (nframes is None):
                mind = dt * np.random.choice(mind, (nframes,))
            else:
                mind = dt * mind
            # Put the information together:
            mindices = np.zeros((mind.shape[0], 2), dtype=int)
            mindices[:, 0] = m
            mindices[:, 1] = mind
            indices.append(mindices)
        # Save to traj:
        if not (traj_inp is None) and not (filename is None) and not (topfile is None):
            pco.save_traj(traj_inp, indices, outfile=filename + "Center%d.xtc" % i, topfile=topfile)
        cindices.append(indices)
    return cindices
Exemple #34
0
def CreateEVHistogram(ev_traj, bins, filename, m=np.array([1]), rg=None, kb=8.314e-3, T=300):
    ''' Create a histogram of the eigenfunction.
    
    Parameters:
    ------------
    ev_traj: List of eigenfunction trajectories.
    nbins: int, number of bins.
    m: Indices of eigenfunctions to be histogrammed: By default, the second ei-
    genfunction is shown. If m contains another integer, this function is shown.
    If m is a two-element array, a 2d-histogram of the two functions is shown.
    '''
    # Get the number of trajectories:
    ntraj = len(ev_traj)
    # Create a reader of eigenfunction data:
    ef = pco.source(ev_traj)
    ef.chunksize = np.min(ef.trajectory_lengths())
    # Create the histogram depending on m:
    if m.shape[0] == 1:
        psidata = ef.get_output(dimensions=m)
        psi = np.zeros((0, 1))
        # Stack all data on top of each other:
        for m in range(ntraj):
            psi = np.vstack((psi, psidata[m]))
        # Show the histogram:
        plt.figure()
        plt.hist(psi, bins=bins, range=rg)
    elif m.shape[0] == 2:
        psidata = ef.get_output(dimensions=m)
        psi = np.zeros((0, 2))
        # Stack all data on top of each other:
        for m in range(ntraj):
            psi = np.vstack((psi, psidata[m]))
        # Show the histogram: 
        plt.figure()
        H, xe, ye = np.histogram2d(psi[:, 0], psi[:, 1], bins=bins, range=rg, normed=True)
        # Make it a free energy plot:
        binwx = xe[1] - xe[0]
        binwy = ye[1] - ye[0]
        H = H * binwx * binwy
        ind = np.nonzero(H)
        thres = np.min(H[ind[0], ind[1]])
        H2 = thres * np.ones(H.shape)
        H2[ind[0], ind[1]] = H[ind[0], ind[1]]
        H2 = -kb * T * np.log(H2)
        X, Y = np.meshgrid(0.5 * (xe[1:] + xe[:-1]), 0.5 * (ye[1:] + ye[:-1]))
        plt.contourf(X, Y, H2.transpose())
        plt.colorbar()
    else:
        print "Selection in m could not be used."
    plt.savefig(filename)
    plt.show()
Exemple #35
0
    def test_feature_correlation_MD(self):
        # Copying from the test_MD_data
        path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep
        self.pdb_file = os.path.join(path, 'bpti_ca.pdb')
        self.xtc_file = os.path.join(path, 'bpti_mini.xtc')
        inp = source(self.xtc_file, top=self.pdb_file)
        ticamini = tica(inp, lag=1, kinetic_map=False)

        feature_traj = ticamini.data_producer.get_output()[0]
        tica_traj = ticamini.get_output()[0]
        test_corr = ticamini.feature_TIC_correlation
        true_corr = mycorrcoef(feature_traj, tica_traj, ticamini.lag)
        #assert np.isclose(test_corr, true_corr).all()
        np.testing.assert_allclose(test_corr, true_corr, atol=1.E-8)
Exemple #36
0
    def partial_fit(self, X):
        """ incrementally update the estimates

        Parameters
        ----------
        X: array, list of arrays, PyEMMA reader
            input data.
        """
        from pyemma.coordinates import source

        self._estimate(source(X), partial_fit=True)
        self._estimated = True

        return self
Exemple #37
0
def multi_temperature_tram(feat, trajfiles, temperatures, dtrajs=None, stride=1, tica_lag=100,
        keep_tica_dims=20, n_clusters=100, tram_lag=100, engfile="Etot.dat", usecols=(1,), kb=0.0083145):
    """
    Parameters
    ----------
    feat : obj, pyemma.coor.featurizer
        Featurizer object that already has the appropriate features added.
    trajfiles : list
        Names of trajectories to include in estimation.
    temperatures : list
        Temperatures of corresponding trajectories.
    stride : int
        Number of frames to skip in tica and clustering.
    tica_lag : int
        Lagtime to use for constructing tica.
    keep_tica_dims : int
        Number of dimensions to keep from tica. Somewhat ambiguous.
    n_clusters : int
        Number of clusters for kmeans. Somewhat ambiguous. 
    """

    dirs = [ os.path.dirname(x) for x in trajfiles ]
    beta = [ 1./(kb*x) for x in temperatures ]

    if dtrajs is None:
        inp = coor.source(trajfiles, feat)

        tica_obj = coor.tica(inp, lag=tica_lag, dim=keep_tica_dims, stride=stride)
        Y = tica_obj.get_output()

        cl = coor.cluster_kmeans(data=Y, k=n_clusters, stride=stride)
        dtrajs = cl.dtrajs

    # dimensionless energy
    if engfile.endswith("npy"):
        energy_trajs = [ beta[i]*np.load("{}/{}".format(dirs[i], engfile)) for i in range(len(dirs)) ]
    else:
        energy_trajs = [ beta[i]*np.loadtxt("{}/{}".format(dirs[i], engfile), usecols=usecols) for i in range(len(dirs)) ]
    temp_trajs = [ kb*temperatures[i]*np.ones(energy_trajs[i].shape[0], float) for i in range(len(dirs)) ]

    # dTRAM approach
    tram = thermo.estimate_multi_temperature(energy_trajs, temp_trajs,
            dtrajs, energy_unit='kT', temp_unit='kT', estimator='tram',
            lag=tram_lag, maxiter=2000000, maxerr=1e-10)

    return dirs, dtrajs, tram
Exemple #38
0
def ApplyLinearTransform(Y, U, filename):
    """ Apply linear transformation U to time-series given by Y.

    Parameters:
    -------------
    Y, pyemma-reader, containing time series of basis functions.
    U, ndarray, shape (r,s), where r must be identical to dimension of Y and s
        is the number of linear combinations to be extracted.
    filename: str, name to be used to save the data for the new time series.

    Returns:
    -------------
    pyemma-reader, containing the time-series of all the linear transform
        applied to Y."""
    # Get the dimension of the new time-series:
    r = U.shape[1]
    # Get the iterator for the time-series:
    I = Y.iterator()
    # Prepare an empty array for the trajectory pieces:
    file_names = []
    q = 0
    ieval = np.zeros((0, r))
    # Compute the products chunk by chunk:
    for piece in I:
        # Get the trajectory number and the data:
        traj_id = piece[0]
        piece = piece[1]
        # Check if the last trajectory is finished:
        if traj_id > q:
            np.save(filename + "_%d.npy" % q, ieval)
            file_names.append(filename + "_%d.npy" % q)
            ieval = np.zeros((0, r))
            q += 1
        # Apply linear transform:
        piece = np.dot(piece, U)
        # Stack the result underneath the previous results:
        ieval = np.vstack((ieval, piece))
    # Save the last trajectory:
    np.save(filename + "_%d.npy" % q, ieval)
    file_names.append(filename + "_%d.npy" % q)
    # Build a new reader and return it:
    reader = pco.source(file_names)
    reader.chunksize = Y.chunksize
    return reader
Exemple #39
0
# Dimension:
d = 16

''' 2. Basis functions and directories:'''
print "Preparing data:"
# Path of basis evaluations:
basispath = fundamental_path + "TTApplications/ALA10TT2/Evaluations/"
# Number of trajectories:
ntraj = 6
# List for basis readers:
basis = []
for i in range(d):
    # Create list of evaluation files for this coordinate:
    file_list = [basispath+"Traj%d/Basis%d.npy"%(j,i) for j in range(ntraj)]
    # Create a reader for this basis:
    ireader = pco.source(file_list,chunk_size=50000)
    # Append it:
    basis.append(ireader)
    
# Define a directory for intermediate files, interfaces, and results:
ifacedir = fundamental_path + "TTApplications/ALA10TT2/Interfaces/"
ifilename = fundamental_path + "TTApplications/ALA10TT2/Intermediate/Intermediate"
resdir = fundamental_path + "TTApplications/ALA10TT2/ResultsCG/"

''' 3. Computational Settings:'''
# Lag time:
tau = 40
# Physical time step:
dt = 0.05
# Number of eigenfunctions:
M = 2
Exemple #40
0
            scale = 0.3
        else:
            # Use native contact distance as threshold for native pairs.
            logger.info("    contacts between native pairs")
            pairs = np.loadtxt("%s/native_contacts.ndx" % dirs[0],dtype=int,skiprows=1) - 1
            threshold = np.loadtxt("%s/pairwise_params" % dirs[0],usecols=(4,))[1:2*pairs.shape[0]:2] + 0.1
            scale = 0.3


        # Featurizer parameterizes a pipeline to read in trajectory in chunks.
        feat = coor.featurizer(topfile)
        feat.add_tanh_contacts(pairs,threshold=threshold,scale=scale,periodic=False)

        # Source trajectories
        logger.info("  sourcing trajectories: %s" % traj_list.__str__())
        inp = coor.source(traj_list, feat)

        # Stride has a drastic influence on the number of acceptable eigenvalues.
        logger.info("  computing TICA")
        tica_obj = coor.tica(inp, lag=lag, stride=stride, var_cutoff=0.9, kinetic_map=True)

        # Check if eigenvalues go negative at some point. Truncate before that if necessary.
        logger.info("  TICA done")
        logger.info("    number of dimensions: %d" % tica_obj.dimension())
        if tica_obj.dimension() == 1:
            keep_dims = 1
        else:
            if sum(tica_obj.eigenvalues < 0) > 0:
                first_neg_eigval = np.where(tica_obj.eigenvalues < 0)[0][0]
                keep_dims = min([tica_obj.dimension(),first_neg_eigval])
                logger.info("    first negative eigenvalue: %d" % first_neg_eigval)
Exemple #41
0
    tempdirs = [ "T_{:.2f}_{}".format(T, x) for x in [1,2,3] ]

    topfile = tempdirs[0] + "/" + topname

    trajfiles = [ x + "/" + trajname for x in tempdirs ]

    # add features
    feat = coor.featurizer(topfile)
    feat, feature_info = util.sbm_contact_features(feat, pairwise_file, n_native_pairs)

    if not os.path.exists("msm"):
        os.mkdir("msm")

    if (not os.path.exists("msm/dtrajs.pkl")) or recluster:
        # cluster if necessary
        inp = coor.source(trajfiles, feat)
        tica_obj = coor.tica(inp, dim=tica_dims, lag=tica_lag, stride=stride)
        Y = tica_obj.get_output()
        cl = coor.cluster_kmeans(data=Y, k=n_clusters)
        dtrajs = cl.dtrajs

        os.chdir("msm")
        dirs = [ os.path.basename(os.path.dirname(x)) for x in trajfiles ]

        if not dontsavemsm:
            dtraj_info = { dirs[x]:dtrajs[x] for x in range(len(dirs)) }
            dtraj_info["dirs"] = dirs
            with open("dtrajs.pkl", 'wb') as fhandle:
                pickle.dump(dtraj_info, fhandle)
    else:
        os.chdir("msm")
Exemple #42
0
traj[0].save_pdb(reference_pdb_filename)

################################################################################
# Initialize featurizer
################################################################################

print('Initializing backbone torsions featurizer...')
featurizer = coor.featurizer(reference_pdb_filename)
featurizer.add_backbone_torsions()

################################################################################
# Define coordinates source
################################################################################

trajectory_files = glob(os.path.join(source_directory, '*0.h5'))
coordinates_source = coor.source(trajectory_files,featurizer)
print("There are %d frames total in %d trajectories." % (coordinates_source.n_frames_total(), coordinates_source.number_of_trajectories()))

################################################################################
# Do tICA
################################################################################

print('tICA...')
running_tica = coor.tica(lag=100, dim=100)

################################################################################
# Cluster
################################################################################

print('Clustering...')
clustering = coor.cluster_kmeans(k=100, stride=50)