Example #1
0
    def test_evil_things_not_allowed(self):
        """ overwrite the pickling procedure with something an evil method. Ensure it raises."""
        import subprocess
        from pickle import UnpicklingError
        import types
        called = {'result': False}

        def evil(self):
            called['result'] = True
            return subprocess.Popen, ('/bin/sh', )

        inst = np_container(np.empty(0))
        old = SerializableMixIn.__getstate__
        old2 = inst.__class__.__reduce__
        try:
            del SerializableMixIn.__getstate__
            inst.__class__.__reduce__ = types.MethodType(evil, inst)
            inst.save(self.fn)
            with self.assertRaises(UnpicklingError) as e:
                pyemma.load(self.fn)
            self.assertIn('not allowed', str(e.exception))
            self.assertTrue(called, 'hack not executed')
        finally:
            SerializableMixIn.__getstate__ = old
            np_container.__reduce__ = old2
Example #2
0
    def test_ML_MSM_estimated(self):
        params = {
            'dtrajs': [[0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0],
                       [0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0]],
            'lag':
            2
        }
        ml_msm = pyemma.msm.estimate_markov_model(**params)
        assert isinstance(ml_msm, pyemma.msm.MaximumLikelihoodMSM)

        ml_msm.save(self.f)
        new_obj = load(self.f)

        self.assertEqual(new_obj._estimated, new_obj._estimated)
        np.testing.assert_equal(new_obj.transition_matrix,
                                ml_msm.transition_matrix)
        np.testing.assert_equal(new_obj.count_matrix_active,
                                ml_msm.count_matrix_active)
        np.testing.assert_equal(new_obj.active_set, ml_msm.active_set)
        np.testing.assert_equal(new_obj.ncv, ml_msm.ncv)
        np.testing.assert_equal(new_obj.discrete_trajectories_full,
                                ml_msm.discrete_trajectories_full)

        # access
        ml_msm.active_state_indexes
        ml_msm.save(self.f, 'new')
        restored = load(self.f, 'new')

        assert len(ml_msm.active_state_indexes) == len(
            restored.active_state_indexes)
        for x, y in zip(ml_msm.active_state_indexes,
                        restored.active_state_indexes):
            np.testing.assert_equal(x, y)
Example #3
0
def _check_serialize(vamp):
    import six
    if six.PY2:
        return vamp
    import tempfile
    import pyemma
    try:
        with tempfile.NamedTemporaryFile(delete=False) as ntf:
            vamp.save(ntf.name)
            restored = pyemma.load(ntf.name)

        np.testing.assert_allclose(restored.model.C00, vamp.model.C00)
        np.testing.assert_allclose(restored.model.C0t, vamp.model.C0t)
        np.testing.assert_allclose(restored.model.Ctt, vamp.model.Ctt)
        np.testing.assert_allclose(restored.cumvar, vamp.cumvar)
        assert_allclose_ignore_phase(restored.singular_values,
                                     vamp.singular_values)
        assert_allclose_ignore_phase(restored.singular_vectors_left,
                                     vamp.singular_vectors_left)
        assert_allclose_ignore_phase(restored.singular_vectors_right,
                                     vamp.singular_vectors_right)
        np.testing.assert_equal(restored.dimension(), vamp.dimension())
        assert restored.model._svd_performed == vamp.model._svd_performed
        return restored
    finally:
        import os
        os.remove(ntf.name)
Example #4
0
    def test_save_chain(self):
        """ ensure a chain is correctly saved/restored"""
        from pyemma.datasets import get_bpti_test_data

        reader = pyemma.coordinates.source(get_bpti_test_data()['trajs'], top=get_bpti_test_data()['top'])
        tica = pyemma.coordinates.tica(reader)
        cluster = pyemma.coordinates.cluster_uniform_time(tica, 10)

        cluster.save(self.fn, save_streaming_chain=True)
        restored = pyemma.load(self.fn)
        self.assertIsInstance(restored, type(cluster))
        self.assertIsInstance(restored.data_producer, type(tica))
        self.assertIsInstance(restored.data_producer.data_producer, type(reader))
        cluster.save(self.fn, overwrite=True, save_streaming_chain=False)
        restored = pyemma.load(self.fn)
        assert restored.data_producer is None
Example #5
0
 def tearDown(self):
     if py3:
         import tempfile
         with tempfile.NamedTemporaryFile(delete=False) as fh:
             self.ck.save(fh.name)
             restored = pyemma.load(fh.name)
             assert hasattr(restored, 'has_errors')
Example #6
0
    def compare(self, obj, params):
        """

        Parameters
        ----------
        obj
        params

        Returns
        -------

        """
        fn = self.fn
        obj.save(fn)
        restored = pyemma.load(fn)

        for k, v in params.items():
            actual = getattr(restored, k)
            expected = getattr(obj, k)
            if isinstance(actual, np.ndarray):
                np.testing.assert_equal(actual, expected)
            elif isinstance(actual, list):
                self.assertListEqual(actual, expected)
            else:
                self.assertEqual(actual, expected)
        # return the restored obj for further evaluation
        return restored
Example #7
0
 def test_msm_coarse_grain(self):
     pcca = self.msm.pcca(2)
     self.msm.save(self.f)
     restored = load(self.f)
     np.testing.assert_equal(restored.metastable_memberships,
                             pcca.memberships)
     np.testing.assert_equal(restored.metastable_distributions,
                             pcca.output_probabilities)
Example #8
0
 def test_ml_msm_sparse(self):
     from pyemma.util.contexts import numpy_random_seed
     with numpy_random_seed(42):
         msm = pyemma.msm.estimate_markov_model(
             [np.random.randint(0, 1000, size=10000)], sparse=True, lag=1)
         assert msm.sparse
         msm.save(self.f)
         restored = load(self.f)
         assert restored.sparse
Example #9
0
    def test_interpolation_with_map(self):
        c = test_cls_v1()
        c.save(self.fn)
        with patch_old_location(test_cls_v1, to_interpolate_with_functions):
            inst_restored = pyemma.load(self.fn)

        self.assertIsInstance(inst_restored, to_interpolate_with_functions)
        self.assertEqual(inst_restored.y,
                         to_interpolate_with_functions.map_y(None))
Example #10
0
    def test_updated_class_v2_to_v3(self):
        inst = test_cls_v2()
        inst.save(self.fn)

        with patch_old_location(test_cls_v2, test_cls_v3):
            inst_restored = pyemma.load(self.fn)

        self.assertIsInstance(inst_restored, test_cls_v3)
        self.assertEqual(inst_restored.z, 23)
        self.assertFalse(hasattr(inst_restored, 'y'))
Example #11
0
    def test_recent_model_with_old_version(self):
        """ no backward compatibility, eg. recent models are not supported by old version of software. """
        inst = test_cls_v3()
        inst.save(self.fn)
        from pyemma._base.serialization.serialization import OldVersionUnsupported
        old = SerializableMixIn._get_version(inst.__class__)

        def _set_version(cls, val):
            setattr(cls, '_%s__serialize_version' % cls.__name__, val)

        _set_version(test_cls_v3, 0)
        try:
            with self.assertRaises(OldVersionUnsupported) as c:
                pyemma.load(self.fn)
            self.assertIn(
                "need at least version {version}".format(
                    version=pyemma.version), c.exception.args[0])
        finally:
            _set_version(test_cls_v3, old)
Example #12
0
    def test_oom(self):
        self.oom.save(self.f)

        restored = load(self.f)
        np.testing.assert_equal(self.oom.eigenvalues_OOM,
                                restored.eigenvalues_OOM)
        np.testing.assert_equal(self.oom.timescales_OOM,
                                restored.timescales_OOM)
        np.testing.assert_equal(self.oom.OOM_rank, restored.OOM_rank)
        np.testing.assert_equal(self.oom.OOM_omega, restored.OOM_omega)
        np.testing.assert_equal(self.oom.OOM_sigma, restored.OOM_sigma)
Example #13
0
    def test_cktest(self):
        ck = self.bmsm_rev.cktest(nsets=2, mlags=[1, 3])

        ck.save(self.f)
        restored = load(self.f)

        np.testing.assert_equal(restored.lagtimes, ck.lagtimes)
        np.testing.assert_equal(restored.predictions, ck.predictions)
        np.testing.assert_equal(restored.predictions_conf, ck.predictions_conf)
        np.testing.assert_equal(restored.estimates, ck.estimates)
        np.testing.assert_equal(restored.estimates_conf, ck.estimates_conf)
Example #14
0
    def test_msm_save_load(self):
        self.msm.save(self.f)
        new_obj = load(self.f)

        np.testing.assert_equal(new_obj.transition_matrix,
                                self.msm.transition_matrix)
        self.assertEqual(new_obj.nstates, self.msm.nstates)
        self.assertEqual(new_obj.is_sparse, self.msm.is_sparse)
        self.assertEqual(new_obj.is_reversible, self.msm.is_reversible)

        self.assertEqual(new_obj, self.msm)
Example #15
0
    def test_its(self):
        lags = [1, 2, 3]
        its = pyemma.msm.timescales_msm(self.obs_micro, lags=lags)

        its.save(self.f)
        restored = load(self.f)

        self.assertEqual(restored.estimator.get_params(deep=False),
                         its.estimator.get_params(deep=False))
        np.testing.assert_equal(restored.lags, its.lags)
        np.testing.assert_equal(restored.timescales, its.timescales)
Example #16
0
    def test_updated_class_v1_to_v2(self):
        """ """
        inst = test_cls_v1()
        inst.save(self.fn)

        with patch_old_location(test_cls_v1, test_cls_v2):
            inst_restored = pyemma.load(self.fn)

        self.assertIsInstance(inst_restored, test_cls_v2)
        self.assertEqual(inst_restored.z, 42)
        np.testing.assert_equal(inst_restored.b, inst.a)
Example #17
0
    def test_renamed_class(self):
        """ ensure a removed class gets properly remapped to an existing one """
        old = _deleted_in_old_version()
        old.save(self.fn)

        # mark old_loc as being handled by test_cls_with_old_locations in newer versions.
        with patch_old_location(_deleted_in_old_version,
                                test_cls_with_old_locations):
            # now restore and check it got properly remapped to the new class
            restored = pyemma.load(self.fn)
        # assert isinstance(restored, test_cls_with_old_locations)
        self.assertIsInstance(restored, test_cls_with_old_locations)
Example #18
0
    def test_oom(self):
        oom = pyemma.msm.estimate_markov_model(self.obs_macro,
                                               self.lag,
                                               weights='oom')

        oom.save(self.f)

        restored = load(self.f)
        np.testing.assert_equal(oom.eigenvalues_OOM, restored.eigenvalues_OOM)
        np.testing.assert_equal(oom.timescales_OOM, restored.timescales_OOM)
        np.testing.assert_equal(oom.OOM_rank, restored.OOM_rank)
        np.testing.assert_equal(oom.OOM_omega, restored.OOM_omega)
        np.testing.assert_equal(oom.OOM_sigma, restored.OOM_sigma)
Example #19
0
    def test_hmsm(self):
        params = {
            'dtrajs': [[0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0],
                       [0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0]],
            'lag':
            2,
            'nstates':
            2
        }
        hmm = pyemma.msm.estimate_hidden_markov_model(**params)
        hmm.save(self.f)

        new_obj = load(self.f)
        self._compare_MLHMM(new_obj, hmm)
Example #20
0
 def test_serialization(self):
     ''' check if the test still hold for a restored model. '''
     import tempfile
     f = tempfile.mktemp()
     try:
         self.msm.save(f)
         from pyemma import load
         restored = load(f)
         self.msm = restored
         self.test_eigenvectors()
         self.test_f()
         self.test_pi()
     finally:
         import os
         os.unlink(f)
Example #21
0
    def test_its_sampled(self):
        lags = [1, 3]
        its = pyemma.msm.timescales_msm(self.obs_micro,
                                        lags=lags,
                                        errors='bayes',
                                        nsamples=10)

        its.save(self.f)
        restored = load(self.f)

        self.assertEqual(restored.estimator.get_params(deep=False),
                         its.estimator.get_params(deep=False))
        np.testing.assert_equal(restored.lags, its.lags)
        np.testing.assert_equal(restored.timescales, its.timescales)
        np.testing.assert_equal(restored.sample_mean, its.sample_mean)
Example #22
0
    def test_sampled_MSM_save_load(self):
        self.bmsm_rev.save(self.f)
        new_obj = load(self.f)

        np.testing.assert_equal(new_obj.samples, self.bmsm_rev.samples)

        np.testing.assert_equal(new_obj.transition_matrix,
                                self.bmsm_rev.transition_matrix)
        self.assertEqual(new_obj.nstates, self.bmsm_rev.nstates)
        self.assertEqual(new_obj.is_sparse, self.bmsm_rev.is_sparse)
        self.assertEqual(new_obj.is_reversible, self.bmsm_rev.is_reversible)

        self.assertEqual(new_obj.nsamples, self.bmsm_rev.nsamples)
        self.assertEqual(new_obj.nsteps, self.bmsm_rev.nsteps)
        self.assertEqual(new_obj.conf, self.bmsm_rev.conf)
        self.assertEqual(new_obj.show_progress, self.bmsm_rev.show_progress)
Example #23
0
    def test_bhmm(self):
        params = {
            'dtrajs': [[0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0],
                       [0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0]],
            'lag':
            2,
            'nstates':
            2,
            'nsamples':
            2,
        }
        hmm = pyemma.msm.bayesian_hidden_markov_model(**params)
        hmm.save(self.f)

        new_obj = load(self.f)
        self._compare_MLHMM(new_obj, hmm)
        # compare samples
        self.assertEqual(new_obj.samples, hmm.samples)
Example #24
0
 def test_serialization(self):
     import pyemma
     import tempfile
     try:
         with tempfile.NamedTemporaryFile(delete=False) as ntf:
             self.tpt2.save(ntf.name)
             restored = pyemma.load(ntf.name)
             public_attrs = ('stationary_distribution', 'flux',
                             'gross_flux', 'committor',
                             'backward_committor', 'dt_model', 'total_flux')
             for attr in public_attrs:
                 value = getattr(self.tpt2, attr)
                 if isinstance(value, np.ndarray):
                     np.testing.assert_equal(value, getattr(restored, attr))
                 else:
                     self.assertEqual(value, getattr(restored, attr))
     finally:
         import os
         os.unlink(ntf.name)
Example #25
0
    def test_serialize(self):
        import tempfile
        import pyemma
        f = tempfile.mktemp()
        try:
            self.amm.save(f)
            restored = pyemma.load(f)

            # check estimation parameters
            np.testing.assert_equal(self.amm.lag, restored.lag)
            np.testing.assert_equal(self.amm.count_mode, restored.count_mode)
            np.testing.assert_equal(self.amm.connectivity,
                                    restored.connectivity)
            np.testing.assert_equal(self.amm.dt_traj, restored.dt_traj)
            np.testing.assert_equal(self.amm.E, restored.E)
            np.testing.assert_equal(self.amm.m, restored.m)
            np.testing.assert_equal(self.amm.w, restored.w)
            np.testing.assert_equal(self.amm.eps, restored.eps)
            np.testing.assert_equal(self.amm.support_ci, restored.support_ci)
            np.testing.assert_equal(self.amm.maxiter, restored.maxiter)
            np.testing.assert_equal(self.amm.max_cache, restored.max_cache)
            np.testing.assert_equal(self.amm.mincount_connectivity,
                                    restored.mincount_connectivity)

            # ensure we got the estimated quantities right
            np.testing.assert_equal(self.amm.E_active, restored.E_active)
            np.testing.assert_equal(self.amm.E_min, restored.E_min)
            np.testing.assert_equal(self.amm.E_max, restored.E_max)
            np.testing.assert_equal(self.amm.mhat, restored.mhat)
            np.testing.assert_equal(self.amm.lagrange, restored.lagrange)
            np.testing.assert_equal(self.amm.sigmas, restored.sigmas)
            np.testing.assert_equal(self.amm.count_inside,
                                    restored.count_inside)
            np.testing.assert_equal(self.amm.count_outside,
                                    restored.count_outside)
            # derived from msm_estimator
            np.testing.assert_equal(self.amm.P, restored.P)
            np.testing.assert_equal(self.amm.pi, restored.pi)
        finally:
            import os
            os.unlink(f)
                    pdb.set_trace()
                else:

                    targets.append(keyword)

    state_index = features_df.KeywordLabel.to_list()

    input_directory = constants.output_data_markov_modelling
    stationary_probs_complete = pd.read_csv(input_directory + '/' +
                                            'complete' + '/' +
                                            'stationary_probs.csv')

    for element in metadata_fields_to_agregate:
        print(element)

        mm = pyemma.load(input_directory + element + '_temp/' + 'pyemma_model',
                         'simple')
        output_file_name = 'most_imp_path_' + sources[0] + '_' + targets[0]
        pdb.set_trace()
        visualize_most_important_paths(
            mm, features_df, sources[0], targets[0],
            input_directory + element + '_temp/' + output_file_name)
        '''
        data = mm.P
        stats = stationary_probs_complete[0:100]
        stats  =stats.rename(columns={'topic_name':'KeywordLabel'})
        nodes = features_df.rename(columns={'Unnamed: 0':'index_original'})


        del stats['Unnamed: 0']
        stats_with_indices = stats.merge(nodes)
        stats_with_indices = stats_with_indices.sort_values("index_original")
Example #27
0
axesflux_list = [i for subl in axes_flux for i in subl]

for c, c_bulk, l in zip(conc, concentrations, lag):
    time = l * ps
    ax = axes_list.pop(0)
    ax_flux = axesflux_list.pop(0)
    index = pd.MultiIndex.from_product([[c], state_labels],
                                       names=['[{}]'.format(mol), 'State'])
    print("Calculating models for {} and lag {} timesteps = {} ps".format(
        c, l, time))

    ###Calculate bMSM model with lag l
    model = "{}bMSM-{}-{}-{}-s{}-{}ps.npy".format(work_path, prot, mol, c, dt,
                                                  time)
    if os.path.exists(model):
        bmsm = pyemma.load(model)
        print(
            "\tbMSM model found ({}), skipping calculations \n".format(model))
    else:
        print("\tReading data...")
        kNAC = pyemma.coordinates.load(
            '{}kNAC-{}-{}-{}-i{}-o{}-s{}.npy'.format(work_path, prot, mol, c,
                                                     start * ps, stop,
                                                     dt))  #, stride=2)
        print("\tBayesian MSM with lag time {} ({} ps)....\n".format(l, time))
        bmsm = pyemma.msm.bayesian_markov_model(kNAC.flatten().astype(int),
                                                lag=l,
                                                dt_traj='{} ps'.format(ps),
                                                conf=0.95)
        bmsm.save(model, overwrite=True)
Example #28
0
                                                   weights='empirical')
            print("time1", time.time() - time_start)
            slow_modes = tica_obj_tmp.get_output(stride=vamp_stride)
            tica_timescales = tica_obj_tmp.timescales
            print("tica_timescales", tica_timescales)
            print("slow modes", slow_modes[0][0, :10])
            yall_slow_modes = np.concatenate(slow_modes)
            yall = yall_slow_modes
            yall_slow_modes.shape
            figX, axX = pyemma.plots.plot_free_energy(yall_slow_modes[:, 0],
                                                      yall_slow_modes[:, 1])
            figX.savefig(resultspath + name_data + 'ticafe_i' +
                         str(iter_found) + '.png')

        if Kconfig.project_tica == 'True':
            tica_obj = pyemma.load(refticapath)
            yticaproj = np.concatenate(tica_obj.transform(data))
            xlim = (-2.2, 1.7)
            ylim = (-2.5, 3.4)
            vmax_set = 10
            step = 0.1
            levels = np.arange(0, vmax_set + step, step)
            figX, axX, mi = pyemma.plots.plot_free_energy(yticaproj[:, 0],
                                                          yticaproj[:, 1],
                                                          levels=levels,
                                                          legacy=False)
            axX.set_xlim(xlim)
            axX.set_ylim(ylim)
            axX.set_xlabel('TICA 0')
            axX.set_ylabel('TICA 1')
            figX.savefig(resultspath + name_data + "ticaprojreffe_i" +
Example #29
0
    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
h = ('HDF5 file containing a saved PyEMMA PCA or TICA object ' +
     'equipped with a FeatureReader')
parser.add_argument('--model', metavar='MODELFILE', default='pca.h5', help=h)
h = ('NumPy .npz file containing the projected trajectories ' +
     'indexed by original filenames')
parser.add_argument('--output',
                    metavar='OUTPUTFILE',
                    default='ptrajs.npz',
                    help=h)
h = ('trajectory file compatible with the FeatureReader of the ' +
     'transformation object saved in MODELFILE')
parser.add_argument('trajfiles', metavar='TRAJFILE', nargs='+', help=h)
args = parser.parse_args()

transformer = pyemma.load(args.model)

for i, trajfile in enumerate(args.trajfiles):
    data = pyemma.coordinates.load(
        trajfile, features=transformer.data_producer.featurizer)
    np.save(f'.{i}.npy', transformer.transform(data))

ptrajs = {
    trajfile: np.load(f'.{i}.npy')
    for i, trajfile in enumerate(args.trajfiles)
}
np.savez(args.output, **ptrajs)

for i in range(len(args.trajfiles)):
    os.remove(f'.{i}.npy')
Example #30
0
def main(argv=None):
    import six
    if six.PY2:
        print('This tool is only available for Python3.')
        sys.exit(1)

    import argparse
    from pyemma import load
    from pyemma._base.serialization.h5file import H5File

    parser = argparse.ArgumentParser()
    parser.add_argument('--json', action='store_true', default=False)
    parser.add_argument('files',
                        metavar='files',
                        nargs='+',
                        help='files to inspect')
    parser.add_argument(
        '--recursive',
        action='store_true',
        default=False,
        help='If the pipeline of the stored estimator was stored, '
        'gather these information as well. This will require to load the model, '
        'so it could take a while, if the pipeline contains lots of data.')
    parser.add_argument('-v', '--verbose', action='store_true', default=False)
    args = parser.parse_args(argv)
    # store found models by filename
    from collections import defaultdict
    models = defaultdict(dict)

    for f in args.files:
        try:
            with H5File(f) as fh:
                m = fh.models_descriptive
            for k in m:
                models[f][k] = m[k]
            for model_name, values in m.items():
                if values['saved_streaming_chain']:
                    restored = load(f)
                    models[f][model_name]['input_chain'] = [
                        repr(x) for x in restored._data_flow_chain()
                    ]
        except BaseException as e:
            print(
                '{} did not contain a valid PyEMMA model. Error was {err}. '
                'If you are sure, that it does, please post an issue on Github'
                .format(f, err=e))
            if args.verbose:
                import traceback
                traceback.print_exc()
            return 1

    if not args.json:
        from io import StringIO

        buff = StringIO()
        buff.write('PyEMMA models\n')
        buff.write('=' * (buff.tell() - 1))
        buff.write('\n' * 2)
        for f in models:
            buff.write('file: {}'.format(f))
            buff.write('\n')
            buff.write('-' * 80)
            buff.write('\n')
            model_file = models[f]
            for i, model_name in enumerate(model_file):
                attrs = model_file[model_name]
                buff.write('{index}. name: {key}\n'
                           'created: {created}\n'
                           '{repr}\n'.format(key=model_name,
                                             index=i + 1,
                                             created=attrs['created_readable'],
                                             repr=attrs['class_str']))
                if attrs['saved_streaming_chain']:
                    buff.write('\n---------Input chain---------\n')
                    for j, x in enumerate(attrs['input_chain']):
                        buff.write('{index}. {repr}\n'.format(index=j + 1,
                                                              repr=x))
            buff.write('-' * 80)
            buff.write('\n')
        buff.seek(0)
        print(buff.read())
    else:
        import json
        json.dump(models, fp=sys.stdout)
    return 0