def read_dtrajs_from_pattern(patterns, logger=getLogger()): """ Parameters ---------- patterns : single pattern or list of patterns eg. '*.txt' or ['/foo/*/bar/*.txt', '*.txt'] Returns ------- list of discrete trajectories : list of numpy arrays, dtype=int """ dtrajs = [] filenames = paths_from_patterns(patterns) if filenames == []: raise ValueError('no match to given pattern') for dt in filenames: # skip directories if os.path.isdir(dt): continue logger.info('reading discrete trajectory: %s' % dt) try: dtrajs.append(read_discrete_trajectory(dt)) except Exception as e: logger.error( 'Exception occurred during reading of %s:\n%s' % (dt, e)) raise return dtrajs
def __init__(self, chain, chunksize=100, param_stride=1): r"""Data processing pipeline. Parameters ---------- chain : list of transformers like objects the order in the list defines the direction of data flow. chunksize : int, optional how many frames shall be processed at once. param_stride : int, optional omit every n'th data point """ self._chain = [] self.chunksize = chunksize self.param_stride = param_stride self.chunksize = chunksize # add given elements in chain for e in chain: self.add_element(e) self._parametrized = False name = "%s[%s]" % (self.__class__.__name__, hex(id(self))) self._logger = getLogger(name)
def __init__(self, topfile): self.topologyfile = topfile self.topology = (mdtraj.load(topfile)).topology self.active_features = [] self._dim = 0 self._logger = getLogger("%s[%s]" % (self.__class__.__name__, hex(id(self))))
def setUpClass(cls): cls.logger = getLogger(cls.__class__.__name__) d = np.arange(3 * 100).reshape((100, 3)) d2 = np.arange(300, 900).reshape((200, 3)) d_1d = np.random.random(100) cls.dir = tempfile.mkdtemp(prefix='pyemma_npyreader') cls.f1 = tempfile.mktemp(suffix='.npy', dir=cls.dir) cls.f2 = tempfile.mktemp(suffix='.npy', dir=cls.dir) cls.f3 = tempfile.mktemp(suffix='.npz', dir=cls.dir) cls.f4 = tempfile.mktemp(suffix='.npy', dir=cls.dir) # 2d np.save(cls.f1, d) np.save(cls.f4, d2) # 1d np.save(cls.f2, d_1d) np.savez(cls.f3, d, d) cls.files2d = [cls.f1, cls.f4] #cls.f3] cls.files1d = [cls.f2] cls.d = d cls.d_1d = d_1d cls.npy_files = [f for f in cls.files2d if f.endswith('.npy')] cls.npz = cls.f3 return cls
def setUpClass(cls): cls.logger = getLogger(cls.__class__.__name__) d = np.arange(3 * 100).reshape((100, 3)) d_1d = np.random.random(100) cls.dir = tempfile.mkdtemp(prefix='pyemma_npyreader') cls.f1 = tempfile.mktemp(suffix='.npy', dir=cls.dir) cls.f2 = tempfile.mktemp(suffix='.npy', dir=cls.dir) cls.f3 = tempfile.mktemp(suffix='.npz', dir=cls.dir) # 2d np.save(cls.f1, d) # 1d np.save(cls.f2, d_1d) np.savez(cls.f3, d, d) cls.files2d = [cls.f1, cls.f3] cls.files1d = [cls.f2] cls.d = d cls.d_1d = d_1d cls.npy_files = [f for f in cls.files2d if f.endswith('.npy')] cls.npz = cls.f3 return cls
def __init__(self, chain, chunksize=100, param_stride=1): """ TODO:chunksize should be estimated from memory requirements (max memory usage) """ self._chain = [] self.chunksize = chunksize self.param_stride = param_stride # add given elements in chain for e in chain: self.add_element(e) self._parametrized = False name = "%s[%s]" % (self.__class__.__name__, hex(id(self))) self._logger = getLogger(name)
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from __future__ import absolute_import import mdtraj as md import numpy as np from pyemma.util.log import getLogger from pyemma.coordinates.data.util.reader_utils import copy_traj_attributes as _copy_traj_attributes, \ preallocate_empty_trajectory as _preallocate_empty_trajectory, enforce_top as _enforce_top from mdtraj.core.trajectory import Trajectory __all__ = ['frames_from_file'] log = getLogger(__name__) def frames_from_files(files, top, frames, chunksize=1000, stride=1, verbose=False, copy_not_join=None): from pyemma.coordinates import source # Enforce topology to be a md.Topology object top = _enforce_top(top) reader = source(files, top=top) stride = int(stride) if stride != 1: frames[:, 1] *= int(stride) if verbose: log.info('A stride value of = %u was parsed, ' 'interpreting "indexes" accordingly.' % stride)
import unittest import os import numpy as np import tempfile from pyemma.util.log import getLogger import pyemma.coordinates as coor import pyemma.util.types as types logger = getLogger('TestCluster') class TestCluster(unittest.TestCase): @classmethod def setUpClass(cls): super(TestCluster, cls).setUpClass() cls.dtraj_dir = tempfile.mkdtemp() # generate Gaussian mixture means = [np.array([-3,0]), np.array([-1,1]), np.array([0,0]), np.array([1,-1]), np.array([4,2])] widths = [np.array([0.1,0.1]), np.array([0.1,0.1]), np.array([0.1,0.1]), np.array([0.1,0.1]), np.array([0.1,0.1])]
# # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from __future__ import absolute_import import unittest import os import numpy as np from pyemma.coordinates.data import MDFeaturizer from pyemma.util.log import getLogger import pyemma.coordinates.api as api import pyemma.util.types as types import pkg_resources logger = getLogger('pyemma.' + 'TestReaderUtils') class TestSource(unittest.TestCase): def setUp(self): path = pkg_resources.resource_filename('pyemma.coordinates.tests', 'data') + os.path.sep self.pdb_file = os.path.join(path, 'bpti_ca.pdb') self.traj_files = [ os.path.join(path, 'bpti_001-033.xtc'), os.path.join(path, 'bpti_067-100.xtc') ] def tearDown(self): pass
''' from __future__ import print_function from __future__ import absolute_import import unittest import os import tempfile import numpy as np import mdtraj from pyemma.coordinates import api from pyemma.coordinates.data.feature_reader import FeatureReader from pyemma.util.log import getLogger from six.moves import range log = getLogger('pyemma.' + 'TestFeatureReaderAndTICA') class TestFeatureReaderAndTICA(unittest.TestCase): @classmethod def setUpClass(cls): cls.dim = 9 # dimension (must be divisible by 3) N = 50000 # length of single trajectory # 500000 N_trajs = 10 # number of trajectories cls.w = 2.0 * np.pi * 1000.0 / N # have 1000 cycles in each trajectory # get random amplitudes and phases cls.A = np.random.randn(cls.dim) cls.phi = np.random.random_sample((cls.dim, )) * np.pi * 2.0 mean = np.random.randn(cls.dim)
__author__ = 'noe' from pyemma.util.log import getLogger import numpy as np log = getLogger('Transformer') __all__ = ['Transformer'] class Transformer(object): """ Parameters ---------- chunksize : int (optional) the chunksize used to batch process underlying data lag : int (optional) if you want to process time lagged data, set this to a value > 0. """ def __init__(self, chunksize=100, lag=0): self.chunksize = chunksize self._lag = lag self._in_memory = False self._dataproducer = None @property def data_producer(self):
# # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from __future__ import absolute_import import unittest import os import numpy as np import tempfile from pyemma.util.log import getLogger import pyemma.coordinates as coor import pyemma.util.types as types from six.moves import range logger = getLogger('TestCluster') class TestCluster(unittest.TestCase): @classmethod def setUpClass(cls): super(TestCluster, cls).setUpClass() cls.dtraj_dir = tempfile.mkdtemp() # generate Gaussian mixture means = [ np.array([-3, 0]), np.array([-1, 1]), np.array([0, 0]), np.array([1, -1]), np.array([4, 2])
''' Test feature reader and Tica by checking the properties of the ICs. cov(ic_i,ic_j) = delta_ij and cov(ic_i,ic_j,tau) = lambda_i delta_ij @author: Fabian Paul ''' import unittest import os import tempfile import numpy as np import mdtraj from pyemma.coordinates.api import tica, _TICA as TICA from pyemma.coordinates.data.feature_reader import FeatureReader from pyemma.util.log import getLogger log = getLogger('TestFeatureReaderAndTICAProjection') def random_invertible(n, eps=0.01): 'generate real random invertible matrix' m = np.random.randn(n, n) u, s, v = np.linalg.svd(m) s = np.maximum(s, eps) return u.dot(np.diag(s)).dot(v) from nose.plugins.attrib import attr @attr(slow=True) class TestFeatureReaderAndTICAProjection(unittest.TestCase): @classmethod def setUpClass(cls): c = super(TestFeatureReaderAndTICAProjection, cls).setUpClass()
''' Created on 18.10.2013 @author: marscher ''' import numpy as np from decimal import Decimal # for wrapping java.math.BigDecimal from pyemma.util.log import getLogger from pyemma.util.pystallone import ndarray_to_stallone_array, stallone, JArray, JInt, JDouble __all__ = ['PathwayDecomposition'] log = getLogger() # TODO: test class PathwayDecomposition(object): def __init__(self, F, Q, A, B): """ Parameters ---------- F : The net fluxes matrix ndarray(dtype=float, shape=(n,n)) Q : The committor vector ndarray(dtype=float, shape=(n) A : set of representatives (indices defining set A in F) ndarray(dtype=int) B : set of representatives ndarray(dtype=int)
@author: marscher ''' import mdtraj import os import tempfile import unittest from pyemma.coordinates import api from pyemma.coordinates.data.feature_reader import FeatureReader from pyemma.util.log import getLogger import pkg_resources import numpy as np from pyemma.coordinates.api import feature_reader, discretizer, tica log = getLogger('TestFeatureReader') class TestFeatureReader(unittest.TestCase): @classmethod def setUpClass(cls): c = super(TestFeatureReader, cls).setUpClass() # create a fake trajectory which has 3 atoms and coordinates are just a range # over all frames. cls.trajfile = tempfile.mktemp('.xtc') cls.n_frames = 1000 cls.xyz = np.random.random(cls.n_frames * 3 * 3).reshape((cls.n_frames, 3, 3)) log.debug("shape traj: %s" % str(cls.xyz.shape)) cls.topfile = pkg_resources.resource_filename( 'pyemma.coordinates.tests.test_featurereader', 'data/test.pdb')
''' Created on 04.02.2015 @author: marscher ''' import unittest from pyemma.coordinates.data.data_in_memory import DataInMemory from pyemma.util.log import getLogger import numpy as np import tempfile import os logger = getLogger('TestDataInMemory') class TestDataInMemory(unittest.TestCase): @classmethod def setUpClass(cls): d = np.random.random((100, 3)) d_1d = np.random.random(100) f1 = tempfile.mktemp() f2 = tempfile.mktemp(suffix='.npy') f3 = tempfile.mktemp() f4 = tempfile.mktemp(suffix='.npy') npz = tempfile.mktemp(suffix='.npz')
''' Created on 02.02.2015 @author: marscher ''' import unittest import numpy as np from pyemma.coordinates import pca from pyemma.util.log import getLogger import pyemma.util.types as types logger = getLogger('TestTICA') class TestPCAExtensive(unittest.TestCase): @classmethod def setUpClass(cls): import pyemma.msm.generation as msmgen # generate HMM with two Gaussians cls.P = np.array([[0.99, 0.01], [0.01, 0.99]]) cls.T = 10000 means = [np.array([-1, 1]), np.array([1, -1])] widths = [np.array([0.3, 2]), np.array([0.3, 2])] # continuous trajectory cls.X = np.zeros((cls.T, 2)) # hidden trajectory dtraj = msmgen.generate_traj(cls.P, cls.T) for t in range(cls.T):
from pyemma.coordinates.data.featurizer import MDFeaturizer as _MDFeaturizer from pyemma.coordinates.data.feature_reader import FeatureReader as _FeatureReader from pyemma.coordinates.data.data_in_memory import DataInMemory as _DataInMemory from pyemma.coordinates.data.util.reader_utils import create_file_reader as _create_file_reader from pyemma.coordinates.data.frames_from_file import frames_from_file as _frames_from_file # transforms from pyemma.coordinates.transform.transformer import Transformer as _Transformer from pyemma.coordinates.transform.pca import PCA as _PCA from pyemma.coordinates.transform.tica import TICA as _TICA # clustering from pyemma.coordinates.clustering.kmeans import KmeansClustering as _KmeansClustering from pyemma.coordinates.clustering.uniform_time import UniformTimeClustering as _UniformTimeClustering from pyemma.coordinates.clustering.regspace import RegularSpaceClustering as _RegularSpaceClustering from pyemma.coordinates.clustering.assign import AssignCenters as _AssignCenters logger = getLogger('coordinates.api') __author__ = "Frank Noe, Martin Scherer" __copyright__ = "Copyright 2015, Computational Molecular Biology Group, FU-Berlin" __credits__ = ["Benjamin Trendelkamp-Schroer", "Martin Scherer", "Frank Noe"] __license__ = "FreeBSD" __version__ = "2.0.0" __maintainer__ = "Martin Scherer" __email__ = "m.scherer AT fu-berlin DOT de" __all__ = ['featurizer', # IO 'load', 'source', 'pipeline', 'discretizer', 'save_traj',
''' Created on 18.02.2015 @author: marscher ''' from pyemma.coordinates.transform.transformer import Transformer from pyemma.util.log import getLogger import numpy as np log = getLogger('Clustering') class AbstractClustering(Transformer): """ provides a common interface for cluster algorithms. """ def __init__(self): super(AbstractClustering, self).__init__() self.clustercenters = None self.dtrajs = [] def map(self, x): """get closest index of point in :attr:`clustercenters` to x.""" d = self.data_producer.distances(x, self.clustercenters) return np.argmin(d) def save_dtrajs(self, trajfiles=None, prefix='', output_format='ascii', extension='.dtraj'):
''' Created on 22.01.2015 @author: marscher ''' from pyemma.util.log import getLogger import numpy as np from pyemma.coordinates.transform.transformer import Transformer log = getLogger('WriterCSV') __all__ = ['WriterCSV'] class WriterCSV(Transformer): ''' shall write to csv files ''' def __init__(self, filename): ''' Constructor ''' super(WriterCSV, self).__init__() # filename should be obtained from source trajectory filename, # eg suffix it to given filename self.filename = filename self.last_frame = False
''' Created on 02.02.2015 @author: marscher ''' import unittest import numpy as np from pyemma.coordinates import pca from pyemma.util.log import getLogger import pyemma.util.types as types logger = getLogger('TestTICA') class TestPCAExtensive(unittest.TestCase): @classmethod def setUpClass(cls): import pyemma.msm.generation as msmgen # generate HMM with two Gaussians cls.P = np.array([[0.99, 0.01], [0.01, 0.99]]) cls.T = 10000 means = [np.array([-1,1]), np.array([1,-1])] widths = [np.array([0.3,2]),np.array([0.3,2])] # continuous trajectory cls.X = np.zeros((cls.T, 2))
''' Created on 19.01.2015 @author: marscher ''' from .transformer import Transformer from pyemma.util.linalg import eig_corr from pyemma.util.log import getLogger from pyemma.util.annotators import doc_inherit import numpy as np log = getLogger('TICA') __all__ = ['TICA'] class TICA(Transformer): r""" Time-lagged independent component analysis (TICA) Given a sequence of multivariate data :math:`X_t`, computes the mean-free covariance and time-lagged covariance matrix: .. math:: C_0 &= (X_t - \mu)^T (X_t - \mu) \\ C_{\tau} &= (X_t - \mu)^T (X_t+\tau - \mu) and solves the eigenvalue problem .. math:: C_{\tau} r_i = C_0 \lambda_i r_i
''' from __future__ import absolute_import import mdtraj import tempfile import unittest from pyemma.coordinates import api from pyemma.coordinates.data.feature_reader import FeatureReader from pyemma.util.log import getLogger import pkg_resources import numpy as np from pyemma.coordinates.api import discretizer, tica, source from six.moves import range log = getLogger('TestFeatureReader') def create_traj(top, format='.xtc', dir=None): trajfile = tempfile.mktemp(suffix=format, dir=dir) n_frames = np.random.randint(500, 1500) log.debug("create traj with %i frames" % n_frames) xyz = np.arange(n_frames * 3 * 3).reshape((n_frames, 3, 3)) t = mdtraj.load(top) t.xyz = xyz t.unitcell_vectors = np.array(n_frames*[[0,0,1], [0,1,0], [1,0,0]]).reshape(n_frames, 3,3) t.time = np.arange(n_frames) t.save(trajfile) return trajfile, xyz, n_frames
#!/usr/bin/env python # encoding: utf-8 """ """ import argparse import sys import os from pyemma.util.log import getLogger from pyemma.msm.generation import generate_traj import pyemma log = getLogger('mm_generate') def handleArgs(): parser = argparse.ArgumentParser() parser.add_argument('-T', type=str, help='path to transition matrix.') parser.add_argument('-o', '--output', dest='output', required=True, help='output filename of trajectory.') parser.add_argument('-dt', type=int, default=1) parser.add_argument('-steps', type=int) parser.add_argument('-start_state', type=int) args = parser.parse_args() return args
from __future__ import absolute_import import pyemma from six.moves import range from six.moves import zip ''' Created on 04.02.2015 @author: marscher ''' import unittest import numpy as np from pyemma.coordinates.data.data_in_memory import DataInMemory from pyemma.util.log import getLogger logger = getLogger('pyemma.' + 'TestDataInMemory') class TestDataInMemory(unittest.TestCase): @classmethod def setUpClass(cls): d = np.random.random((100, 3)) d_1d = np.random.random(100) cls.d = d cls.d_1d = d_1d return cls def test_skip(self): for skip in [0, 3, 13]: r1 = DataInMemory(self.d)
from __future__ import absolute_import import unittest import os import tempfile from pyemma.coordinates.data import MDFeaturizer from pyemma.util.log import getLogger import pyemma.coordinates.api as api import numpy as np from pyemma.coordinates.data.numpy_filereader import NumPyFileReader from pyemma.coordinates.data.py_csv_reader import PyCSVReader as CSVReader import shutil logger = getLogger('TestReaderUtils') class TestApiSourceFileReader(unittest.TestCase): @classmethod def setUpClass(cls): data_np = np.random.random((100, 3)) data_raw = np.arange(300 * 4).reshape(300, 4) cls.dir = tempfile.mkdtemp("test-api-src") cls.npy = tempfile.mktemp(suffix='.npy', dir=cls.dir) cls.npz = tempfile.mktemp(suffix='.npz', dir=cls.dir) cls.dat = tempfile.mktemp(suffix='.dat', dir=cls.dir)
def create_logger(self): # creates a logger based on the the attribe "name" of self self._logger_instance = getLogger(self.name) r = weakref.ref(self, _cleanup_logger(self)) _refs[self.name] = r return self._logger_instance
def __init__(self, counts, mu=None, reversible=False, Tinit=None): """ Sets the count matrix used for sampling. Assumes that the prior (if desired) is included. Parameters ---------- counts : ndarray (n, n) the posterior count matrix mu : ndarray (n) optional stationary distribution, if given, the sampled transition matrix will have this this stat dist. reversible : boolean should sample a reversible transition matrix. Tinit : ndarray(n, n) optional start point for sampling algorithm. Example ------- >>> C = np.array([[5, 2], [1,10]]) >>> sampler = ITransitionMatrixSampler(C) >>> T = sampler.sample(10**6) >>> print T """ if issparse(counts): counts = counts.toarray() # the interface in stallone takes counts as doubles counts = counts.astype(np.float64) try: C = ndarray_to_stallone_array(counts) jpackage = stallone.mc.sampling # convert types to java if Tinit is not None: Tinit = ndarray_to_stallone_array(Tinit) if mu is not None: mu = ndarray_to_stallone_array(mu) if reversible: if mu: # fixed pi if Tinit: self.sampler = jpackage.TransitionMatrixSamplerRevFixPi( C, Tinit, mu) else: self.sampler = jpackage.TransitionMatrixSamplerRevFixPi( C, mu) else: # sample reversible matrix, with arbitrary pi if Tinit: self.sampler = jpackage.TransitionMatrixSamplerRev( C, Tinit) else: self.sampler = jpackage.TransitionMatrixSamplerRev(C) else: # sample non rev if Tinit: self.sampler = jpackage.TransitionMatrixSamplerNonrev( C, Tinit) else: self.sampler = jpackage.TransitionMatrixSamplerNonrev(C) except JavaException as je: log = getLogger() log.exception("Error during creation of tmatrix sampling wrapper:" " stack\n%s" % je.stacktrace()) raise
# # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. from __future__ import absolute_import import os import unittest from pyemma.util.files import TemporaryDirectory from pyemma.util.log import getLogger from six.moves import range import numpy as np import pyemma.coordinates as coor import pyemma.util.types as types logger = getLogger('pyemma.' + 'TestCluster') class TestClusterAssign(unittest.TestCase): @classmethod def setUpClass(cls): super(TestClusterAssign, cls).setUpClass() # generate Gaussian mixture means = [ np.array([-3, 0]), np.array([-1, 1]), np.array([0, 0]), np.array([1, -1]), np.array([4, 2]) ]
from __future__ import absolute_import import unittest import os import pkg_resources import numpy as np from pyemma.coordinates import api from pyemma.coordinates.data.data_in_memory import DataInMemory from pyemma.coordinates import source, tica from pyemma.util.contexts import numpy_random_seed from pyemma.util.log import getLogger import pyemma.util.types as types from six.moves import range logger = getLogger('pyemma.' + 'TestTICA') def mycorrcoef(X, Y, lag): X = X.astype(np.float64) Y = Y.astype(np.float64) mean_X = 0.5 * (np.mean(X[lag:], axis=0) + np.mean(X[0:-lag], axis=0)) mean_Y = 0.5 * (np.mean(Y[lag:], axis=0) + np.mean(Y[0:-lag], axis=0)) cov = ( (X[0:-lag] - mean_X).T.dot(Y[0:-lag] - mean_Y) + (X[lag:] - mean_X).T.dot(Y[lag:] - mean_Y)) / (2 * (X.shape[0] - lag) - 1) autocov_X = ( (X[0:-lag] - mean_X).T.dot(X[0:-lag] - mean_X) + (X[lag:] - mean_X).T.dot(X[lag:] - mean_X)) / (2 *
import unittest import os import tempfile import numpy as np from pyemma.util.log import getLogger import pyemma.coordinates as coor import pyemma.util.types as types logger = getLogger('TestReaderUtils') class TestCluster(unittest.TestCase): @classmethod def setUpClass(cls): super(TestCluster, cls).setUpClass() cls.dtraj_dir = tempfile.mkdtemp() # generate Gaussian mixture means = [np.array([-3,0]), np.array([-1,1]), np.array([0,0]), np.array([1,-1]), np.array([4,2])] widths = [np.array([0.3,2]), np.array([0.3,2]), np.array([0.3,2]), np.array([0.3,2]),
#!/usr/bin/env python ''' Created on 17.02.2014 @author: marscher ''' import argparse import sys from pyemma.msm.estimation.api import count_matrix, largest_connected_set from pyemma.msm.io.api import write_matrix from pyemma.util.files import read_dtrajs_from_pattern from pyemma.util.log import getLogger log = getLogger() def handleArgs(): parser = argparse.ArgumentParser() parser.add_argument('-i', dest='discTraj', required=True, nargs='+', help='list of discrete trajectories') parser.add_argument('-o', dest='output', help='output filename of largest connected set') parser.add_argument( '-lag', dest='lag', help='lag time for which connectivity should be calculated for',
def __create_logger(self): name = "%s[%s]" % (self.__class__.__name__, hex(id(self))) self._logger = getLogger(name)
__author__ = 'noe' import numpy as np from .transformer import Transformer from pyemma.util.log import getLogger from pyemma.util.annotators import doc_inherit log = getLogger('PCA') __all__ = ['PCA'] class PCA(Transformer): r"""Principal component analysis. Given a sequence of multivariate data :math:`X_t`, computes the mean-free covariance matrix. .. math:: C = (X - \mu)^T (X - \mu) and solves the eigenvalue problem .. math:: C r_i = \sigma_i r_i, where :math:`r_i` are the principal components and :math:`\sigma_i` are their respective variances. When used as a dimension reduction method, the input data is projected onto the dominant principal components. Parameters
@author: marscher ''' import numpy as np import warnings from mdtraj.utils.validation import cast_indices from mdtraj.core.trajectory import load, Trajectory, _parse_topology from mdtraj.formats.hdf5 import HDF5TrajectoryFile from mdtraj.utils.unit import in_units_of from mdtraj.formats.lh5 import LH5TrajectoryFile from mdtraj.formats import DCDTrajectoryFile from mdtraj.formats import XTCTrajectoryFile from pyemma.util.log import getLogger log = getLogger('patches') def iterload(filename, chunk=100, **kwargs): """An iterator over a trajectory from one or more files on disk, in fragments This may be more memory efficient than loading an entire trajectory at once Parameters ---------- filename : str Path to the trajectory file on disk chunk : int Number of frames to load at once from disk per iteration. If 0, load all.
__author__ = 'noe' from pyemma.util.log import getLogger from pyemma.coordinates.clustering.interface import AbstractClustering import numpy as np log = getLogger('UniformTimeClustering') __all__ = ['UniformTimeClustering'] class UniformTimeClustering(AbstractClustering): """ Uniform time clustering Parameters ---------- k : int """ def __init__(self, k=2): super(UniformTimeClustering, self).__init__() self.k = k def describe(self): return "[Uniform time clustering, k = %i]" % self.k def dimension(self): return 1 def get_memory_per_frame(self): """
import pyemma from six.moves import range from six.moves import zip ''' Created on 04.02.2015 @author: marscher ''' import unittest import numpy as np from pyemma.coordinates.data.data_in_memory import DataInMemory from pyemma.coordinates.transform.transformer import TransformerIteratorContext from pyemma.util.log import getLogger logger = getLogger('TestDataInMemory') class TestDataInMemory(unittest.TestCase): @classmethod def setUpClass(cls): d = np.random.random((100, 3)) d_1d = np.random.random(100) cls.d = d cls.d_1d = d_1d return cls def testWrongArguments(self): with self.assertRaises(ValueError): reader = DataInMemory("foo")
''' Test feature reader and Tica by checking the properties of the ICs. cov(ic_i,ic_j) = delta_ij and cov(ic_i,ic_j,tau) = lambda_i delta_ij @author: Fabian Paul ''' import unittest import os import tempfile import numpy as np import mdtraj from pyemma.coordinates.api import tica, _TICA as TICA from pyemma.coordinates.data.feature_reader import FeatureReader from pyemma.util.log import getLogger log = getLogger('TestFeatureReaderAndTICAProjection') def random_invertible(n, eps=0.01): 'generate real random invertible matrix' m = np.random.randn(n, n) u, s, v = np.linalg.svd(m) s = np.maximum(s, eps) return u.dot(np.diag(s)).dot(v) from nose.plugins.attrib import attr @attr(slow=True) class TestFeatureReaderAndTICAProjection(unittest.TestCase): @classmethod
''' Created on 22.01.2015 @author: marscher ''' import numpy as np from sklearn.cluster import MiniBatchKMeans from pyemma.util.log import getLogger from pyemma.util.annotators import doc_inherit from pyemma.coordinates.clustering.interface import AbstractClustering log = getLogger('KmeansClustering') __all__ = ['KmeansClustering'] class KmeansClustering(AbstractClustering): r""" Kmeans clustering Parameters ---------- n_clusters : int amount of cluster centers max_iter : int how many iterations per chunk? """ def __init__(self, n_clusters, max_iter=1000): super(KmeansClustering, self).__init__()
def tmatrix_sampler(C, reversible=False, mu=None, T0=None): r"""Generate transition matrix sampler object. Parameters ---------- C : (M, M) ndarray or scipy.sparse matrix Count matrix reversible : bool If true sample from the ensemble of transition matrices restricted to those obeying a detailed balance condition, else draw from the whole ensemble of stochastic matrices. mu : array_like The stationary distribution of the transition matrix samples. T0 : ndarray, shape=(n, n) or scipy.sparse matrix Starting point of the MC chain of the sampling algorithm. Has to obey the required constraints. Returns ------- sampler : A :py:class:dense.ITransitionMatrixSampler object. Notes ----- The transition matrix sampler generates transition matrices from the posterior distribution. The posterior distribution is given as a product of Dirichlet distributions .. math:: \mathbb{P}(T|C) \propto \prod_{i=1}^{M} \left( \prod_{j=1}^{M} p_{ij}^{c_{ij}} \right) The method can generate samples from the posterior under the follwing two constraints **Reversible sampling** Using a MCMC sampler outlined in .. [1] it is ensured that samples from the posterior are reversible, i.e. there is a probability vector :math:`(\mu_i)` such that :math:`\mu_i t_{ij} = \mu_j t_{ji}` holds for all :math:`i,j`. **Reversible sampling with fixed stationary vector** Using a MCMC sampler outlined in .. [2] it is ensured that samples from the posterior fulfill detailed balance with respect to a given probability vector :math:`(\mu_i)`. References ---------- .. [1] Noe, F. 2008. Probability distributions of molecular observables computed from Markov state models. J Chem Phys 128: 244103. .. [2] Trendelkamp-Schroer, B and F Noe. 2013. Efficient Bayesian estimation of Markov model transition matrices with given stationary distribution. J Chem Phys 138: 164113. """ if issparse(C): _showSparseConversionWarning() C=C.toarray() from pyemma.util.pystallone import JavaException try: return ITransitionMatrixSampler(C, mu, reversible, Tinit=T0) except JavaException as je: log = getLogger() log.exception("Error during tmatrix sampling") raise
''' Test feature reader and Tica with a set of cosine time series. @author: Fabian Paul ''' import unittest import os import tempfile import numpy as np import mdtraj from pyemma.coordinates import api from pyemma.coordinates.data.feature_reader import FeatureReader from pyemma.util.log import getLogger log = getLogger('TestFeatureReaderAndTICA') from nose.plugins.attrib import attr @attr(slow=True) class TestFeatureReaderAndTICA(unittest.TestCase): @classmethod def setUpClass(cls): c = super(TestFeatureReaderAndTICA, cls).setUpClass() cls.dim = 99 # dimension (must be divisible by 3) N = 50000 # length of single trajectory # 500000 N_trajs = 10 # number of trajectories cls.w = 2.0*np.pi*1000.0/N # have 1000 cycles in each trajectory # get random amplitudes and phases