Ejemplo n.º 1
0
def read_dtrajs_from_pattern(patterns, logger=getLogger()):
    """
    Parameters
    ----------
    patterns : single pattern or list of patterns
        eg. '*.txt' or ['/foo/*/bar/*.txt', '*.txt']

    Returns
    -------
    list of discrete trajectories : list of numpy arrays, dtype=int

    """
    dtrajs = []
    filenames = paths_from_patterns(patterns)
    if filenames == []:
        raise ValueError('no match to given pattern')
    for dt in filenames:
        # skip directories
        if os.path.isdir(dt):
            continue
        logger.info('reading discrete trajectory: %s' % dt)
        try:
            dtrajs.append(read_discrete_trajectory(dt))
        except Exception as e:
            logger.error(
                'Exception occurred during reading of %s:\n%s' % (dt, e))
            raise
    return dtrajs
Ejemplo n.º 2
0
    def __init__(self, chain, chunksize=100, param_stride=1):
        r"""Data processing pipeline.

        Parameters
        ----------
        chain : list of transformers like objects
            the order in the list defines the direction of data flow.
        chunksize : int, optional
            how many frames shall be processed at once.
        param_stride : int, optional
            omit every n'th data point

        """
        self._chain = []
        self.chunksize = chunksize
        self.param_stride = param_stride
        self.chunksize = chunksize

        # add given elements in chain
        for e in chain:
            self.add_element(e)

        self._parametrized = False

        name = "%s[%s]" % (self.__class__.__name__, hex(id(self)))
        self._logger = getLogger(name)
Ejemplo n.º 3
0
 def __init__(self, topfile):
     self.topologyfile = topfile
     self.topology = (mdtraj.load(topfile)).topology
     self.active_features = []
     self._dim = 0
     self._logger = getLogger("%s[%s]" %
                              (self.__class__.__name__, hex(id(self))))
Ejemplo n.º 4
0
 def __init__(self, topfile):
     self.topologyfile = topfile
     self.topology = (mdtraj.load(topfile)).topology
     self.active_features = []
     self._dim = 0
     self._logger = getLogger("%s[%s]" %
                              (self.__class__.__name__, hex(id(self))))
Ejemplo n.º 5
0
    def setUpClass(cls):

        cls.logger = getLogger(cls.__class__.__name__)

        d = np.arange(3 * 100).reshape((100, 3))
        d2 = np.arange(300, 900).reshape((200, 3))
        d_1d = np.random.random(100)

        cls.dir = tempfile.mkdtemp(prefix='pyemma_npyreader')

        cls.f1 = tempfile.mktemp(suffix='.npy', dir=cls.dir)
        cls.f2 = tempfile.mktemp(suffix='.npy', dir=cls.dir)
        cls.f3 = tempfile.mktemp(suffix='.npz', dir=cls.dir)
        cls.f4 = tempfile.mktemp(suffix='.npy', dir=cls.dir)

        # 2d
        np.save(cls.f1, d)
        np.save(cls.f4, d2)

        # 1d
        np.save(cls.f2, d_1d)

        np.savez(cls.f3, d, d)

        cls.files2d = [cls.f1, cls.f4]  #cls.f3]
        cls.files1d = [cls.f2]
        cls.d = d
        cls.d_1d = d_1d

        cls.npy_files = [f for f in cls.files2d if f.endswith('.npy')]
        cls.npz = cls.f3

        return cls
Ejemplo n.º 6
0
    def setUpClass(cls):

        cls.logger = getLogger(cls.__class__.__name__)

        d = np.arange(3 * 100).reshape((100, 3))
        d_1d = np.random.random(100)

        cls.dir = tempfile.mkdtemp(prefix='pyemma_npyreader')

        cls.f1 = tempfile.mktemp(suffix='.npy', dir=cls.dir)
        cls.f2 = tempfile.mktemp(suffix='.npy', dir=cls.dir)
        cls.f3 = tempfile.mktemp(suffix='.npz', dir=cls.dir)

        # 2d
        np.save(cls.f1, d)

        # 1d
        np.save(cls.f2, d_1d)

        np.savez(cls.f3, d, d)

        cls.files2d = [cls.f1, cls.f3]
        cls.files1d = [cls.f2]
        cls.d = d
        cls.d_1d = d_1d

        cls.npy_files = [f for f in cls.files2d if f.endswith('.npy')]
        cls.npz = cls.f3

        return cls
Ejemplo n.º 7
0
    def __init__(self, chain, chunksize=100, param_stride=1):
        """

        TODO:chunksize should be estimated from memory requirements (max memory usage)
        """
        self._chain = []
        self.chunksize = chunksize
        self.param_stride = param_stride

        # add given elements in chain
        for e in chain:
            self.add_element(e)

        self._parametrized = False

        name = "%s[%s]" % (self.__class__.__name__, hex(id(self)))
        self._logger = getLogger(name)
Ejemplo n.º 8
0
    def __init__(self, chain, chunksize=100, param_stride=1):
        """

        TODO:chunksize should be estimated from memory requirements (max memory usage)
        """
        self._chain = []
        self.chunksize = chunksize
        self.param_stride = param_stride

        # add given elements in chain
        for e in chain:
            self.add_element(e)

        self._parametrized = False

        name = "%s[%s]" % (self.__class__.__name__, hex(id(self)))
        self._logger = getLogger(name)
Ejemplo n.º 9
0
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import

import mdtraj as md
import numpy as np
from pyemma.util.log import getLogger
from pyemma.coordinates.data.util.reader_utils import copy_traj_attributes as _copy_traj_attributes, \
    preallocate_empty_trajectory as _preallocate_empty_trajectory, enforce_top as _enforce_top
from mdtraj.core.trajectory import Trajectory
__all__ = ['frames_from_file']

log = getLogger(__name__)


def frames_from_files(files, top, frames, chunksize=1000, stride=1, verbose=False, copy_not_join=None):
    from pyemma.coordinates import source
    # Enforce topology to be a md.Topology object
    top = _enforce_top(top)
    reader = source(files, top=top)
    stride = int(stride)

    if stride != 1:
        frames[:, 1] *= int(stride)
        if verbose:
            log.info('A stride value of = %u was parsed, '
                     'interpreting "indexes" accordingly.' % stride)
Ejemplo n.º 10
0
import unittest
import os
import numpy as np
import tempfile

from pyemma.util.log import getLogger
import pyemma.coordinates as coor
import pyemma.util.types as types


logger = getLogger('TestCluster')


class TestCluster(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        super(TestCluster, cls).setUpClass()
        cls.dtraj_dir = tempfile.mkdtemp()

        # generate Gaussian mixture
        means = [np.array([-3,0]),
                 np.array([-1,1]),
                 np.array([0,0]),
                 np.array([1,-1]),
                 np.array([4,2])]
        widths = [np.array([0.1,0.1]),
                  np.array([0.1,0.1]),
                  np.array([0.1,0.1]),
                  np.array([0.1,0.1]),
                  np.array([0.1,0.1])]
Ejemplo n.º 11
0
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import absolute_import
import unittest
import os
import numpy as np

from pyemma.coordinates.data import MDFeaturizer
from pyemma.util.log import getLogger
import pyemma.coordinates.api as api
import pyemma.util.types as types
import pkg_resources

logger = getLogger('pyemma.' + 'TestReaderUtils')


class TestSource(unittest.TestCase):
    def setUp(self):
        path = pkg_resources.resource_filename('pyemma.coordinates.tests',
                                               'data') + os.path.sep
        self.pdb_file = os.path.join(path, 'bpti_ca.pdb')
        self.traj_files = [
            os.path.join(path, 'bpti_001-033.xtc'),
            os.path.join(path, 'bpti_067-100.xtc')
        ]

    def tearDown(self):
        pass
Ejemplo n.º 12
0
'''

from __future__ import print_function

from __future__ import absolute_import
import unittest
import os
import tempfile
import numpy as np
import mdtraj
from pyemma.coordinates import api
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.util.log import getLogger
from six.moves import range

log = getLogger('pyemma.' + 'TestFeatureReaderAndTICA')


class TestFeatureReaderAndTICA(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.dim = 9  # dimension (must be divisible by 3)
        N = 50000  # length of single trajectory # 500000
        N_trajs = 10  # number of trajectories

        cls.w = 2.0 * np.pi * 1000.0 / N  # have 1000 cycles in each trajectory

        # get random amplitudes and phases
        cls.A = np.random.randn(cls.dim)
        cls.phi = np.random.random_sample((cls.dim, )) * np.pi * 2.0
        mean = np.random.randn(cls.dim)
Ejemplo n.º 13
0
__author__ = 'noe'

from pyemma.util.log import getLogger

import numpy as np


log = getLogger('Transformer')
__all__ = ['Transformer']


class Transformer(object):

    """

    Parameters
    ----------
    chunksize : int (optional)
        the chunksize used to batch process underlying data
    lag : int (optional)
        if you want to process time lagged data, set this to a value > 0.
    """

    def __init__(self, chunksize=100, lag=0):
        self.chunksize = chunksize
        self._lag = lag
        self._in_memory = False
        self._dataproducer = None

    @property
    def data_producer(self):
Ejemplo n.º 14
0
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

from __future__ import absolute_import
import unittest
import os
import numpy as np
import tempfile

from pyemma.util.log import getLogger
import pyemma.coordinates as coor
import pyemma.util.types as types
from six.moves import range

logger = getLogger('TestCluster')


class TestCluster(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        super(TestCluster, cls).setUpClass()
        cls.dtraj_dir = tempfile.mkdtemp()

        # generate Gaussian mixture
        means = [
            np.array([-3, 0]),
            np.array([-1, 1]),
            np.array([0, 0]),
            np.array([1, -1]),
            np.array([4, 2])
'''
Test feature reader and Tica by checking the properties of the ICs.
cov(ic_i,ic_j) = delta_ij and cov(ic_i,ic_j,tau) = lambda_i delta_ij
@author: Fabian Paul
'''
import unittest
import os
import tempfile
import numpy as np
import mdtraj
from pyemma.coordinates.api import tica, _TICA as TICA
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.util.log import getLogger

log = getLogger('TestFeatureReaderAndTICAProjection')

def random_invertible(n, eps=0.01):
    'generate real random invertible matrix'
    m = np.random.randn(n, n)
    u, s, v = np.linalg.svd(m)
    s = np.maximum(s, eps)
    return u.dot(np.diag(s)).dot(v)

from nose.plugins.attrib import attr


@attr(slow=True)
class TestFeatureReaderAndTICAProjection(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        c = super(TestFeatureReaderAndTICAProjection, cls).setUpClass()
Ejemplo n.º 16
0
'''
Created on 18.10.2013

@author: marscher
'''
import numpy as np
from decimal import Decimal # for wrapping java.math.BigDecimal

from pyemma.util.log import getLogger
from pyemma.util.pystallone import ndarray_to_stallone_array, stallone, JArray, JInt, JDouble

__all__ = ['PathwayDecomposition']

log = getLogger()


# TODO: test
class PathwayDecomposition(object):

    def __init__(self, F, Q, A, B):
        """
        Parameters
        ----------
        F : The net fluxes matrix
          ndarray(dtype=float, shape=(n,n))
        Q : The committor vector
          ndarray(dtype=float, shape=(n)
        A : set of representatives (indices defining set A in F)
          ndarray(dtype=int)
        B : set of representatives
          ndarray(dtype=int)
Ejemplo n.º 17
0
@author: marscher
'''
import mdtraj
import os
import tempfile
import unittest
from pyemma.coordinates import api
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.util.log import getLogger
import pkg_resources

import numpy as np
from pyemma.coordinates.api import feature_reader, discretizer, tica

log = getLogger('TestFeatureReader')


class TestFeatureReader(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        c = super(TestFeatureReader, cls).setUpClass()
        # create a fake trajectory which has 3 atoms and coordinates are just a range
        # over all frames.
        cls.trajfile = tempfile.mktemp('.xtc')
        cls.n_frames = 1000
        cls.xyz = np.random.random(cls.n_frames * 3 * 3).reshape((cls.n_frames, 3, 3))
        log.debug("shape traj: %s" % str(cls.xyz.shape))
        cls.topfile = pkg_resources.resource_filename(
            'pyemma.coordinates.tests.test_featurereader', 'data/test.pdb')
Ejemplo n.º 18
0
'''
Created on 04.02.2015

@author: marscher
'''
import unittest

from pyemma.coordinates.data.data_in_memory import DataInMemory
from pyemma.util.log import getLogger
import numpy as np

import tempfile
import os

logger = getLogger('TestDataInMemory')


class TestDataInMemory(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        d = np.random.random((100, 3))
        d_1d = np.random.random(100)

        f1 = tempfile.mktemp()
        f2 = tempfile.mktemp(suffix='.npy')
        f3 = tempfile.mktemp()
        f4 = tempfile.mktemp(suffix='.npy')

        npz = tempfile.mktemp(suffix='.npz')
Ejemplo n.º 19
0
'''
Created on 02.02.2015

@author: marscher
'''
import unittest

import numpy as np

from pyemma.coordinates import pca
from pyemma.util.log import getLogger
import pyemma.util.types as types

logger = getLogger('TestTICA')


class TestPCAExtensive(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        import pyemma.msm.generation as msmgen

        # generate HMM with two Gaussians
        cls.P = np.array([[0.99, 0.01], [0.01, 0.99]])
        cls.T = 10000
        means = [np.array([-1, 1]), np.array([1, -1])]
        widths = [np.array([0.3, 2]), np.array([0.3, 2])]
        # continuous trajectory
        cls.X = np.zeros((cls.T, 2))
        # hidden trajectory
        dtraj = msmgen.generate_traj(cls.P, cls.T)
        for t in range(cls.T):
Ejemplo n.º 20
0
from pyemma.coordinates.data.featurizer import MDFeaturizer as _MDFeaturizer
from pyemma.coordinates.data.feature_reader import FeatureReader as _FeatureReader
from pyemma.coordinates.data.data_in_memory import DataInMemory as _DataInMemory
from pyemma.coordinates.data.util.reader_utils import create_file_reader as _create_file_reader
from pyemma.coordinates.data.frames_from_file import frames_from_file as _frames_from_file
# transforms
from pyemma.coordinates.transform.transformer import Transformer as _Transformer
from pyemma.coordinates.transform.pca import PCA as _PCA
from pyemma.coordinates.transform.tica import TICA as _TICA
# clustering
from pyemma.coordinates.clustering.kmeans import KmeansClustering as _KmeansClustering
from pyemma.coordinates.clustering.uniform_time import UniformTimeClustering as _UniformTimeClustering
from pyemma.coordinates.clustering.regspace import RegularSpaceClustering as _RegularSpaceClustering
from pyemma.coordinates.clustering.assign import AssignCenters as _AssignCenters

logger = getLogger('coordinates.api')

__author__ = "Frank Noe, Martin Scherer"
__copyright__ = "Copyright 2015, Computational Molecular Biology Group, FU-Berlin"
__credits__ = ["Benjamin Trendelkamp-Schroer", "Martin Scherer", "Frank Noe"]
__license__ = "FreeBSD"
__version__ = "2.0.0"
__maintainer__ = "Martin Scherer"
__email__ = "m.scherer AT fu-berlin DOT de"

__all__ = ['featurizer',  # IO
           'load',
           'source',
           'pipeline',
           'discretizer',
           'save_traj',
Ejemplo n.º 21
0
'''
Created on 18.02.2015

@author: marscher
'''
from pyemma.coordinates.transform.transformer import Transformer
from pyemma.util.log import getLogger
import numpy as np

log = getLogger('Clustering')


class AbstractClustering(Transformer):
    """
    provides a common interface for cluster algorithms.
    """
    def __init__(self):
        super(AbstractClustering, self).__init__()
        self.clustercenters = None
        self.dtrajs = []

    def map(self, x):
        """get closest index of point in :attr:`clustercenters` to x."""
        d = self.data_producer.distances(x, self.clustercenters)
        return np.argmin(d)

    def save_dtrajs(self,
                    trajfiles=None,
                    prefix='',
                    output_format='ascii',
                    extension='.dtraj'):
Ejemplo n.º 22
0
'''
Created on 22.01.2015

@author: marscher
'''
from pyemma.util.log import getLogger

import numpy as np
from pyemma.coordinates.transform.transformer import Transformer


log = getLogger('WriterCSV')
__all__ = ['WriterCSV']


class WriterCSV(Transformer):

    '''
    shall write to csv files
    '''

    def __init__(self, filename):
        '''
        Constructor
        '''
        super(WriterCSV, self).__init__()

        # filename should be obtained from source trajectory filename,
        # eg suffix it to given filename
        self.filename = filename
        self.last_frame = False
Ejemplo n.º 23
0
'''
Created on 02.02.2015

@author: marscher
'''
import unittest

import numpy as np

from pyemma.coordinates import pca
from pyemma.util.log import getLogger
import pyemma.util.types as types


logger = getLogger('TestTICA')


class TestPCAExtensive(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        import pyemma.msm.generation as msmgen

        # generate HMM with two Gaussians
        cls.P = np.array([[0.99, 0.01],
                      [0.01, 0.99]])
        cls.T = 10000
        means = [np.array([-1,1]), np.array([1,-1])]
        widths = [np.array([0.3,2]),np.array([0.3,2])]
        # continuous trajectory
        cls.X = np.zeros((cls.T, 2))
Ejemplo n.º 24
0
'''
Created on 19.01.2015

@author: marscher
'''
from .transformer import Transformer
from pyemma.util.linalg import eig_corr
from pyemma.util.log import getLogger
from pyemma.util.annotators import doc_inherit

import numpy as np

log = getLogger('TICA')
__all__ = ['TICA']


class TICA(Transformer):
    r"""
    Time-lagged independent component analysis (TICA)

    Given a sequence of multivariate data :math:`X_t`, computes the mean-free
    covariance and time-lagged covariance matrix:

    .. math::

        C_0 &=   (X_t - \mu)^T (X_t - \mu) \\
        C_{\tau} &= (X_t - \mu)^T (X_t+\tau - \mu)
    and solves the eigenvalue problem

    .. math:: C_{\tau} r_i = C_0 \lambda_i r_i
Ejemplo n.º 25
0
'''

from __future__ import absolute_import
import mdtraj
import tempfile
import unittest
from pyemma.coordinates import api
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.util.log import getLogger
import pkg_resources

import numpy as np
from pyemma.coordinates.api import discretizer, tica, source
from six.moves import range

log = getLogger('TestFeatureReader')

def create_traj(top, format='.xtc', dir=None):
    trajfile = tempfile.mktemp(suffix=format, dir=dir)
    n_frames = np.random.randint(500, 1500)
    log.debug("create traj with %i frames" % n_frames)
    xyz = np.arange(n_frames * 3 * 3).reshape((n_frames, 3, 3))

    t = mdtraj.load(top)
    t.xyz = xyz
    t.unitcell_vectors = np.array(n_frames*[[0,0,1], [0,1,0], [1,0,0]]).reshape(n_frames, 3,3)
    t.time = np.arange(n_frames)
    t.save(trajfile)

    return trajfile, xyz, n_frames
Ejemplo n.º 26
0
#!/usr/bin/env python
# encoding: utf-8
"""
"""
import argparse
import sys
import os
from pyemma.util.log import getLogger
from pyemma.msm.generation import generate_traj
import pyemma

log = getLogger('mm_generate')


def handleArgs():
    parser = argparse.ArgumentParser()
    parser.add_argument('-T', type=str, help='path to transition matrix.')
    parser.add_argument('-o',
                        '--output',
                        dest='output',
                        required=True,
                        help='output filename of trajectory.')
    parser.add_argument('-dt', type=int, default=1)
    parser.add_argument('-steps', type=int)
    parser.add_argument('-start_state', type=int)

    args = parser.parse_args()

    return args

Ejemplo n.º 27
0
from __future__ import absolute_import
import pyemma
from six.moves import range
from six.moves import zip
'''
Created on 04.02.2015

@author: marscher
'''
import unittest
import numpy as np

from pyemma.coordinates.data.data_in_memory import DataInMemory
from pyemma.util.log import getLogger

logger = getLogger('pyemma.' + 'TestDataInMemory')


class TestDataInMemory(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        d = np.random.random((100, 3))
        d_1d = np.random.random(100)

        cls.d = d
        cls.d_1d = d_1d
        return cls

    def test_skip(self):
        for skip in [0, 3, 13]:
            r1 = DataInMemory(self.d)
Ejemplo n.º 28
0
from __future__ import absolute_import
import unittest
import os
import tempfile

from pyemma.coordinates.data import MDFeaturizer
from pyemma.util.log import getLogger
import pyemma.coordinates.api as api
import numpy as np
from pyemma.coordinates.data.numpy_filereader import NumPyFileReader
from pyemma.coordinates.data.py_csv_reader import PyCSVReader as CSVReader
import shutil


logger = getLogger('TestReaderUtils')


class TestApiSourceFileReader(unittest.TestCase):

    @classmethod
    def setUpClass(cls):

        data_np = np.random.random((100, 3))
        data_raw = np.arange(300 * 4).reshape(300, 4)

        cls.dir = tempfile.mkdtemp("test-api-src")

        cls.npy = tempfile.mktemp(suffix='.npy', dir=cls.dir)
        cls.npz = tempfile.mktemp(suffix='.npz', dir=cls.dir)
        cls.dat = tempfile.mktemp(suffix='.dat', dir=cls.dir)
Ejemplo n.º 29
0
def create_logger(self):
    # creates a logger based on the the attribe "name" of self
    self._logger_instance = getLogger(self.name)
    r = weakref.ref(self, _cleanup_logger(self))
    _refs[self.name] = r
    return self._logger_instance
Ejemplo n.º 30
0
    def __init__(self, counts, mu=None, reversible=False, Tinit=None):
        """
        Sets the count matrix used for sampling. Assumes that the prior 
        (if desired) is included.
       
        Parameters
        ----------
        counts : ndarray (n, n)
            the posterior count matrix
        mu : ndarray (n)
           optional stationary distribution, if given, the sampled transition matrix
           will have this this stat dist.
        reversible : boolean
           should sample a reversible transition matrix.
           
        Tinit : ndarray(n, n)
           optional start point for sampling algorithm.
           
        Example
        -------
        >>> C = np.array([[5, 2], [1,10]]) 
        >>> sampler = ITransitionMatrixSampler(C)
        >>> T = sampler.sample(10**6)
        >>> print T
        
        """
        if issparse(counts):
            counts = counts.toarray()
        # the interface in stallone takes counts as doubles
        counts = counts.astype(np.float64)

        try:
            C = ndarray_to_stallone_array(counts)
            jpackage = stallone.mc.sampling
            # convert types to java
            if Tinit is not None:
                Tinit = ndarray_to_stallone_array(Tinit)
            if mu is not None:
                mu = ndarray_to_stallone_array(mu)

            if reversible:
                if mu:  # fixed pi
                    if Tinit:
                        self.sampler = jpackage.TransitionMatrixSamplerRevFixPi(
                            C, Tinit, mu)
                    else:
                        self.sampler = jpackage.TransitionMatrixSamplerRevFixPi(
                            C, mu)
                else:  # sample reversible matrix, with arbitrary pi
                    if Tinit:
                        self.sampler = jpackage.TransitionMatrixSamplerRev(
                            C, Tinit)
                    else:
                        self.sampler = jpackage.TransitionMatrixSamplerRev(C)
            else:  # sample non rev
                if Tinit:
                    self.sampler = jpackage.TransitionMatrixSamplerNonrev(
                        C, Tinit)
                else:
                    self.sampler = jpackage.TransitionMatrixSamplerNonrev(C)

        except JavaException as je:
            log = getLogger()
            log.exception("Error during creation of tmatrix sampling wrapper:"
                          " stack\n%s" % je.stacktrace())
            raise
Ejemplo n.º 31
0
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import

import os
import unittest

from pyemma.util.files import TemporaryDirectory
from pyemma.util.log import getLogger
from six.moves import range
import numpy as np
import pyemma.coordinates as coor
import pyemma.util.types as types

logger = getLogger('pyemma.' + 'TestCluster')


class TestClusterAssign(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        super(TestClusterAssign, cls).setUpClass()

        # generate Gaussian mixture
        means = [
            np.array([-3, 0]),
            np.array([-1, 1]),
            np.array([0, 0]),
            np.array([1, -1]),
            np.array([4, 2])
        ]
Ejemplo n.º 32
0
from __future__ import absolute_import
import unittest
import os
import pkg_resources
import numpy as np

from pyemma.coordinates import api

from pyemma.coordinates.data.data_in_memory import DataInMemory
from pyemma.coordinates import source, tica
from pyemma.util.contexts import numpy_random_seed
from pyemma.util.log import getLogger
import pyemma.util.types as types
from six.moves import range

logger = getLogger('pyemma.' + 'TestTICA')


def mycorrcoef(X, Y, lag):
    X = X.astype(np.float64)
    Y = Y.astype(np.float64)
    mean_X = 0.5 * (np.mean(X[lag:], axis=0) + np.mean(X[0:-lag], axis=0))
    mean_Y = 0.5 * (np.mean(Y[lag:], axis=0) + np.mean(Y[0:-lag], axis=0))
    cov = (
        (X[0:-lag] - mean_X).T.dot(Y[0:-lag] - mean_Y) +
        (X[lag:] - mean_X).T.dot(Y[lag:] - mean_Y)) / (2 *
                                                       (X.shape[0] - lag) - 1)

    autocov_X = (
        (X[0:-lag] - mean_X).T.dot(X[0:-lag] - mean_X) +
        (X[lag:] - mean_X).T.dot(X[lag:] - mean_X)) / (2 *
Ejemplo n.º 33
0
import unittest
import os
import tempfile

import numpy as np

from pyemma.util.log import getLogger
import pyemma.coordinates as coor
import pyemma.util.types as types


logger = getLogger('TestReaderUtils')


class TestCluster(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        super(TestCluster, cls).setUpClass()
        cls.dtraj_dir = tempfile.mkdtemp()

        # generate Gaussian mixture
        means = [np.array([-3,0]),
                 np.array([-1,1]),
                 np.array([0,0]),
                 np.array([1,-1]),
                 np.array([4,2])]
        widths = [np.array([0.3,2]),
                  np.array([0.3,2]),
                  np.array([0.3,2]),
                  np.array([0.3,2]),
Ejemplo n.º 34
0
#!/usr/bin/env python
'''
Created on 17.02.2014

@author: marscher
'''
import argparse
import sys

from pyemma.msm.estimation.api import count_matrix, largest_connected_set
from pyemma.msm.io.api import write_matrix
from pyemma.util.files import read_dtrajs_from_pattern
from pyemma.util.log import getLogger

log = getLogger()


def handleArgs():
    parser = argparse.ArgumentParser()
    parser.add_argument('-i',
                        dest='discTraj',
                        required=True,
                        nargs='+',
                        help='list of discrete trajectories')
    parser.add_argument('-o',
                        dest='output',
                        help='output filename of largest connected set')
    parser.add_argument(
        '-lag',
        dest='lag',
        help='lag time for which connectivity should be calculated for',
Ejemplo n.º 35
0
 def __create_logger(self):
     name = "%s[%s]" % (self.__class__.__name__, hex(id(self)))
     self._logger = getLogger(name)
Ejemplo n.º 36
0
__author__ = 'noe'

import numpy as np
from .transformer import Transformer
from pyemma.util.log import getLogger
from pyemma.util.annotators import doc_inherit

log = getLogger('PCA')
__all__ = ['PCA']


class PCA(Transformer):

    r"""Principal component analysis.

    Given a sequence of multivariate data :math:`X_t`,
    computes the mean-free covariance matrix.

    .. math:: C = (X - \mu)^T (X - \mu)

    and solves the eigenvalue problem

    .. math:: C r_i = \sigma_i r_i,

    where :math:`r_i` are the principal components and :math:`\sigma_i` are
    their respective variances.

    When used as a dimension reduction method, the input data is projected onto
    the dominant principal components.

    Parameters
Ejemplo n.º 37
0
@author: marscher
'''
import numpy as np
import warnings

from mdtraj.utils.validation import cast_indices
from mdtraj.core.trajectory import load, Trajectory, _parse_topology
from mdtraj.formats.hdf5 import HDF5TrajectoryFile
from mdtraj.utils.unit import in_units_of
from mdtraj.formats.lh5 import LH5TrajectoryFile
from mdtraj.formats import DCDTrajectoryFile
from mdtraj.formats import XTCTrajectoryFile

from pyemma.util.log import getLogger

log = getLogger('patches')


def iterload(filename, chunk=100, **kwargs):
    """An iterator over a trajectory from one or more files on disk, in fragments

    This may be more memory efficient than loading an entire trajectory at
    once

    Parameters
    ----------
    filename : str
        Path to the trajectory file on disk
    chunk : int
        Number of frames to load at once from disk per iteration.  If 0, load all.
Ejemplo n.º 38
0
__author__ = 'noe'

from pyemma.util.log import getLogger
from pyemma.coordinates.clustering.interface import AbstractClustering

import numpy as np

log = getLogger('UniformTimeClustering')
__all__ = ['UniformTimeClustering']


class UniformTimeClustering(AbstractClustering):
    """
    Uniform time clustering

    Parameters
    ----------
    k : int
    """
    def __init__(self, k=2):
        super(UniformTimeClustering, self).__init__()
        self.k = k

    def describe(self):
        return "[Uniform time clustering, k = %i]" % self.k

    def dimension(self):
        return 1

    def get_memory_per_frame(self):
        """
Ejemplo n.º 39
0
import pyemma
from six.moves import range
from six.moves import zip
'''
Created on 04.02.2015

@author: marscher
'''
import unittest
import numpy as np

from pyemma.coordinates.data.data_in_memory import DataInMemory
from pyemma.coordinates.transform.transformer import TransformerIteratorContext
from pyemma.util.log import getLogger

logger = getLogger('TestDataInMemory')


class TestDataInMemory(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        d = np.random.random((100, 3))
        d_1d = np.random.random(100)

        cls.d = d
        cls.d_1d = d_1d
        return cls

    def testWrongArguments(self):
        with self.assertRaises(ValueError):
            reader = DataInMemory("foo")
'''
Test feature reader and Tica by checking the properties of the ICs.
cov(ic_i,ic_j) = delta_ij and cov(ic_i,ic_j,tau) = lambda_i delta_ij
@author: Fabian Paul
'''
import unittest
import os
import tempfile
import numpy as np
import mdtraj
from pyemma.coordinates.api import tica, _TICA as TICA
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.util.log import getLogger

log = getLogger('TestFeatureReaderAndTICAProjection')


def random_invertible(n, eps=0.01):
    'generate real random invertible matrix'
    m = np.random.randn(n, n)
    u, s, v = np.linalg.svd(m)
    s = np.maximum(s, eps)
    return u.dot(np.diag(s)).dot(v)


from nose.plugins.attrib import attr


@attr(slow=True)
class TestFeatureReaderAndTICAProjection(unittest.TestCase):
    @classmethod
Ejemplo n.º 41
0
'''
Created on 22.01.2015

@author: marscher
'''
import numpy as np
from sklearn.cluster import MiniBatchKMeans

from pyemma.util.log import getLogger
from pyemma.util.annotators import doc_inherit
from pyemma.coordinates.clustering.interface import AbstractClustering

log = getLogger('KmeansClustering')

__all__ = ['KmeansClustering']


class KmeansClustering(AbstractClustering):
    r"""
    Kmeans clustering

    Parameters
    ----------
    n_clusters : int
        amount of cluster centers
    max_iter : int 
        how many iterations per chunk?

    """
    def __init__(self, n_clusters, max_iter=1000):
        super(KmeansClustering, self).__init__()
Ejemplo n.º 42
0
def tmatrix_sampler(C, reversible=False, mu=None, T0=None):
    r"""Generate transition matrix sampler object.
    
    Parameters
    ----------
    C : (M, M) ndarray or scipy.sparse matrix
        Count matrix
    reversible : bool
        If true sample from the ensemble of transition matrices
        restricted to those obeying a detailed balance condition,
        else draw from the whole ensemble of stochastic matrices.
    mu : array_like
        The stationary distribution of the transition matrix samples.
    T0 : ndarray, shape=(n, n) or scipy.sparse matrix
        Starting point of the MC chain of the sampling algorithm.
        Has to obey the required constraints.
    
    Returns
    -------
    sampler : A :py:class:dense.ITransitionMatrixSampler object.

    Notes
    -----
    The transition matrix sampler generates transition matrices from
    the posterior distribution. The posterior distribution is given as
    a product of Dirichlet distributions

    .. math:: \mathbb{P}(T|C) \propto \prod_{i=1}^{M} \left( \prod_{j=1}^{M} p_{ij}^{c_{ij}} \right)

    The method can generate samples from the posterior under the follwing two constraints
    
    **Reversible sampling**

    Using a MCMC sampler outlined in .. [1] it is ensured that samples
    from the posterior are reversible, i.e. there is a probability
    vector :math:`(\mu_i)` such that :math:`\mu_i t_{ij} = \mu_j
    t_{ji}` holds for all :math:`i,j`.

    **Reversible sampling with fixed stationary vector**

    Using a MCMC sampler outlined in .. [2] it is ensured that samples
    from the posterior fulfill detailed balance with respect to a given 
    probability vector :math:`(\mu_i)`.

    References
    ----------
    .. [1] Noe, F. 2008. Probability distributions of molecular observables
        computed from Markov state models. J Chem Phys 128: 244103.
    .. [2] Trendelkamp-Schroer, B and F Noe. 2013. Efficient Bayesian estimation
        of Markov model transition matrices with given stationary distribution.
        J Chem Phys 138: 164113.
    
    """
    if issparse(C):
        _showSparseConversionWarning()
        C=C.toarray()
    
    from pyemma.util.pystallone import JavaException
    try:
        return ITransitionMatrixSampler(C, mu, reversible, Tinit=T0)
    except JavaException as je:
        log = getLogger()
        log.exception("Error during tmatrix sampling")
        raise
'''
Test feature reader and Tica with a set of cosine time series.
@author: Fabian Paul
'''
import unittest
import os
import tempfile
import numpy as np
import mdtraj
from pyemma.coordinates import api
from pyemma.coordinates.data.feature_reader import FeatureReader
from pyemma.util.log import getLogger

log = getLogger('TestFeatureReaderAndTICA')

from nose.plugins.attrib import attr


@attr(slow=True)
class TestFeatureReaderAndTICA(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        c = super(TestFeatureReaderAndTICA, cls).setUpClass()

        cls.dim = 99  # dimension (must be divisible by 3)
        N = 50000  # length of single trajectory # 500000
        N_trajs = 10  # number of trajectories
        
        cls.w = 2.0*np.pi*1000.0/N  # have 1000 cycles in each trajectory

        # get random amplitudes and phases