def test_evil_things_not_allowed(self): """ overwrite the pickling procedure with something an evil method. Ensure it raises.""" import subprocess from pickle import UnpicklingError import types called = {'result': False} def evil(self): called['result'] = True return subprocess.Popen, ('/bin/sh', ) inst = np_container(np.empty(0)) old = SerializableMixIn.__getstate__ old2 = inst.__class__.__reduce__ try: del SerializableMixIn.__getstate__ inst.__class__.__reduce__ = types.MethodType(evil, inst) inst.save(self.fn) with self.assertRaises(UnpicklingError) as e: pyemma.load(self.fn) self.assertIn('not allowed', str(e.exception)) self.assertTrue(called, 'hack not executed') finally: SerializableMixIn.__getstate__ = old np_container.__reduce__ = old2
def test_ML_MSM_estimated(self): params = { 'dtrajs': [[0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0]], 'lag': 2 } ml_msm = pyemma.msm.estimate_markov_model(**params) assert isinstance(ml_msm, pyemma.msm.MaximumLikelihoodMSM) ml_msm.save(self.f) new_obj = load(self.f) self.assertEqual(new_obj._estimated, new_obj._estimated) np.testing.assert_equal(new_obj.transition_matrix, ml_msm.transition_matrix) np.testing.assert_equal(new_obj.count_matrix_active, ml_msm.count_matrix_active) np.testing.assert_equal(new_obj.active_set, ml_msm.active_set) np.testing.assert_equal(new_obj.ncv, ml_msm.ncv) np.testing.assert_equal(new_obj.discrete_trajectories_full, ml_msm.discrete_trajectories_full) # access ml_msm.active_state_indexes ml_msm.save(self.f, 'new') restored = load(self.f, 'new') assert len(ml_msm.active_state_indexes) == len( restored.active_state_indexes) for x, y in zip(ml_msm.active_state_indexes, restored.active_state_indexes): np.testing.assert_equal(x, y)
def _check_serialize(vamp): import six if six.PY2: return vamp import tempfile import pyemma try: with tempfile.NamedTemporaryFile(delete=False) as ntf: vamp.save(ntf.name) restored = pyemma.load(ntf.name) np.testing.assert_allclose(restored.model.C00, vamp.model.C00) np.testing.assert_allclose(restored.model.C0t, vamp.model.C0t) np.testing.assert_allclose(restored.model.Ctt, vamp.model.Ctt) np.testing.assert_allclose(restored.cumvar, vamp.cumvar) assert_allclose_ignore_phase(restored.singular_values, vamp.singular_values) assert_allclose_ignore_phase(restored.singular_vectors_left, vamp.singular_vectors_left) assert_allclose_ignore_phase(restored.singular_vectors_right, vamp.singular_vectors_right) np.testing.assert_equal(restored.dimension(), vamp.dimension()) assert restored.model._svd_performed == vamp.model._svd_performed return restored finally: import os os.remove(ntf.name)
def test_save_chain(self): """ ensure a chain is correctly saved/restored""" from pyemma.datasets import get_bpti_test_data reader = pyemma.coordinates.source(get_bpti_test_data()['trajs'], top=get_bpti_test_data()['top']) tica = pyemma.coordinates.tica(reader) cluster = pyemma.coordinates.cluster_uniform_time(tica, 10) cluster.save(self.fn, save_streaming_chain=True) restored = pyemma.load(self.fn) self.assertIsInstance(restored, type(cluster)) self.assertIsInstance(restored.data_producer, type(tica)) self.assertIsInstance(restored.data_producer.data_producer, type(reader)) cluster.save(self.fn, overwrite=True, save_streaming_chain=False) restored = pyemma.load(self.fn) assert restored.data_producer is None
def tearDown(self): if py3: import tempfile with tempfile.NamedTemporaryFile(delete=False) as fh: self.ck.save(fh.name) restored = pyemma.load(fh.name) assert hasattr(restored, 'has_errors')
def compare(self, obj, params): """ Parameters ---------- obj params Returns ------- """ fn = self.fn obj.save(fn) restored = pyemma.load(fn) for k, v in params.items(): actual = getattr(restored, k) expected = getattr(obj, k) if isinstance(actual, np.ndarray): np.testing.assert_equal(actual, expected) elif isinstance(actual, list): self.assertListEqual(actual, expected) else: self.assertEqual(actual, expected) # return the restored obj for further evaluation return restored
def test_msm_coarse_grain(self): pcca = self.msm.pcca(2) self.msm.save(self.f) restored = load(self.f) np.testing.assert_equal(restored.metastable_memberships, pcca.memberships) np.testing.assert_equal(restored.metastable_distributions, pcca.output_probabilities)
def test_ml_msm_sparse(self): from pyemma.util.contexts import numpy_random_seed with numpy_random_seed(42): msm = pyemma.msm.estimate_markov_model( [np.random.randint(0, 1000, size=10000)], sparse=True, lag=1) assert msm.sparse msm.save(self.f) restored = load(self.f) assert restored.sparse
def test_interpolation_with_map(self): c = test_cls_v1() c.save(self.fn) with patch_old_location(test_cls_v1, to_interpolate_with_functions): inst_restored = pyemma.load(self.fn) self.assertIsInstance(inst_restored, to_interpolate_with_functions) self.assertEqual(inst_restored.y, to_interpolate_with_functions.map_y(None))
def test_updated_class_v2_to_v3(self): inst = test_cls_v2() inst.save(self.fn) with patch_old_location(test_cls_v2, test_cls_v3): inst_restored = pyemma.load(self.fn) self.assertIsInstance(inst_restored, test_cls_v3) self.assertEqual(inst_restored.z, 23) self.assertFalse(hasattr(inst_restored, 'y'))
def test_recent_model_with_old_version(self): """ no backward compatibility, eg. recent models are not supported by old version of software. """ inst = test_cls_v3() inst.save(self.fn) from pyemma._base.serialization.serialization import OldVersionUnsupported old = SerializableMixIn._get_version(inst.__class__) def _set_version(cls, val): setattr(cls, '_%s__serialize_version' % cls.__name__, val) _set_version(test_cls_v3, 0) try: with self.assertRaises(OldVersionUnsupported) as c: pyemma.load(self.fn) self.assertIn( "need at least version {version}".format( version=pyemma.version), c.exception.args[0]) finally: _set_version(test_cls_v3, old)
def test_oom(self): self.oom.save(self.f) restored = load(self.f) np.testing.assert_equal(self.oom.eigenvalues_OOM, restored.eigenvalues_OOM) np.testing.assert_equal(self.oom.timescales_OOM, restored.timescales_OOM) np.testing.assert_equal(self.oom.OOM_rank, restored.OOM_rank) np.testing.assert_equal(self.oom.OOM_omega, restored.OOM_omega) np.testing.assert_equal(self.oom.OOM_sigma, restored.OOM_sigma)
def test_cktest(self): ck = self.bmsm_rev.cktest(nsets=2, mlags=[1, 3]) ck.save(self.f) restored = load(self.f) np.testing.assert_equal(restored.lagtimes, ck.lagtimes) np.testing.assert_equal(restored.predictions, ck.predictions) np.testing.assert_equal(restored.predictions_conf, ck.predictions_conf) np.testing.assert_equal(restored.estimates, ck.estimates) np.testing.assert_equal(restored.estimates_conf, ck.estimates_conf)
def test_msm_save_load(self): self.msm.save(self.f) new_obj = load(self.f) np.testing.assert_equal(new_obj.transition_matrix, self.msm.transition_matrix) self.assertEqual(new_obj.nstates, self.msm.nstates) self.assertEqual(new_obj.is_sparse, self.msm.is_sparse) self.assertEqual(new_obj.is_reversible, self.msm.is_reversible) self.assertEqual(new_obj, self.msm)
def test_its(self): lags = [1, 2, 3] its = pyemma.msm.timescales_msm(self.obs_micro, lags=lags) its.save(self.f) restored = load(self.f) self.assertEqual(restored.estimator.get_params(deep=False), its.estimator.get_params(deep=False)) np.testing.assert_equal(restored.lags, its.lags) np.testing.assert_equal(restored.timescales, its.timescales)
def test_updated_class_v1_to_v2(self): """ """ inst = test_cls_v1() inst.save(self.fn) with patch_old_location(test_cls_v1, test_cls_v2): inst_restored = pyemma.load(self.fn) self.assertIsInstance(inst_restored, test_cls_v2) self.assertEqual(inst_restored.z, 42) np.testing.assert_equal(inst_restored.b, inst.a)
def test_renamed_class(self): """ ensure a removed class gets properly remapped to an existing one """ old = _deleted_in_old_version() old.save(self.fn) # mark old_loc as being handled by test_cls_with_old_locations in newer versions. with patch_old_location(_deleted_in_old_version, test_cls_with_old_locations): # now restore and check it got properly remapped to the new class restored = pyemma.load(self.fn) # assert isinstance(restored, test_cls_with_old_locations) self.assertIsInstance(restored, test_cls_with_old_locations)
def test_oom(self): oom = pyemma.msm.estimate_markov_model(self.obs_macro, self.lag, weights='oom') oom.save(self.f) restored = load(self.f) np.testing.assert_equal(oom.eigenvalues_OOM, restored.eigenvalues_OOM) np.testing.assert_equal(oom.timescales_OOM, restored.timescales_OOM) np.testing.assert_equal(oom.OOM_rank, restored.OOM_rank) np.testing.assert_equal(oom.OOM_omega, restored.OOM_omega) np.testing.assert_equal(oom.OOM_sigma, restored.OOM_sigma)
def test_hmsm(self): params = { 'dtrajs': [[0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0]], 'lag': 2, 'nstates': 2 } hmm = pyemma.msm.estimate_hidden_markov_model(**params) hmm.save(self.f) new_obj = load(self.f) self._compare_MLHMM(new_obj, hmm)
def test_serialization(self): ''' check if the test still hold for a restored model. ''' import tempfile f = tempfile.mktemp() try: self.msm.save(f) from pyemma import load restored = load(f) self.msm = restored self.test_eigenvectors() self.test_f() self.test_pi() finally: import os os.unlink(f)
def test_its_sampled(self): lags = [1, 3] its = pyemma.msm.timescales_msm(self.obs_micro, lags=lags, errors='bayes', nsamples=10) its.save(self.f) restored = load(self.f) self.assertEqual(restored.estimator.get_params(deep=False), its.estimator.get_params(deep=False)) np.testing.assert_equal(restored.lags, its.lags) np.testing.assert_equal(restored.timescales, its.timescales) np.testing.assert_equal(restored.sample_mean, its.sample_mean)
def test_sampled_MSM_save_load(self): self.bmsm_rev.save(self.f) new_obj = load(self.f) np.testing.assert_equal(new_obj.samples, self.bmsm_rev.samples) np.testing.assert_equal(new_obj.transition_matrix, self.bmsm_rev.transition_matrix) self.assertEqual(new_obj.nstates, self.bmsm_rev.nstates) self.assertEqual(new_obj.is_sparse, self.bmsm_rev.is_sparse) self.assertEqual(new_obj.is_reversible, self.bmsm_rev.is_reversible) self.assertEqual(new_obj.nsamples, self.bmsm_rev.nsamples) self.assertEqual(new_obj.nsteps, self.bmsm_rev.nsteps) self.assertEqual(new_obj.conf, self.bmsm_rev.conf) self.assertEqual(new_obj.show_progress, self.bmsm_rev.show_progress)
def test_bhmm(self): params = { 'dtrajs': [[0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0]], 'lag': 2, 'nstates': 2, 'nsamples': 2, } hmm = pyemma.msm.bayesian_hidden_markov_model(**params) hmm.save(self.f) new_obj = load(self.f) self._compare_MLHMM(new_obj, hmm) # compare samples self.assertEqual(new_obj.samples, hmm.samples)
def test_serialization(self): import pyemma import tempfile try: with tempfile.NamedTemporaryFile(delete=False) as ntf: self.tpt2.save(ntf.name) restored = pyemma.load(ntf.name) public_attrs = ('stationary_distribution', 'flux', 'gross_flux', 'committor', 'backward_committor', 'dt_model', 'total_flux') for attr in public_attrs: value = getattr(self.tpt2, attr) if isinstance(value, np.ndarray): np.testing.assert_equal(value, getattr(restored, attr)) else: self.assertEqual(value, getattr(restored, attr)) finally: import os os.unlink(ntf.name)
def test_serialize(self): import tempfile import pyemma f = tempfile.mktemp() try: self.amm.save(f) restored = pyemma.load(f) # check estimation parameters np.testing.assert_equal(self.amm.lag, restored.lag) np.testing.assert_equal(self.amm.count_mode, restored.count_mode) np.testing.assert_equal(self.amm.connectivity, restored.connectivity) np.testing.assert_equal(self.amm.dt_traj, restored.dt_traj) np.testing.assert_equal(self.amm.E, restored.E) np.testing.assert_equal(self.amm.m, restored.m) np.testing.assert_equal(self.amm.w, restored.w) np.testing.assert_equal(self.amm.eps, restored.eps) np.testing.assert_equal(self.amm.support_ci, restored.support_ci) np.testing.assert_equal(self.amm.maxiter, restored.maxiter) np.testing.assert_equal(self.amm.max_cache, restored.max_cache) np.testing.assert_equal(self.amm.mincount_connectivity, restored.mincount_connectivity) # ensure we got the estimated quantities right np.testing.assert_equal(self.amm.E_active, restored.E_active) np.testing.assert_equal(self.amm.E_min, restored.E_min) np.testing.assert_equal(self.amm.E_max, restored.E_max) np.testing.assert_equal(self.amm.mhat, restored.mhat) np.testing.assert_equal(self.amm.lagrange, restored.lagrange) np.testing.assert_equal(self.amm.sigmas, restored.sigmas) np.testing.assert_equal(self.amm.count_inside, restored.count_inside) np.testing.assert_equal(self.amm.count_outside, restored.count_outside) # derived from msm_estimator np.testing.assert_equal(self.amm.P, restored.P) np.testing.assert_equal(self.amm.pi, restored.pi) finally: import os os.unlink(f)
pdb.set_trace() else: targets.append(keyword) state_index = features_df.KeywordLabel.to_list() input_directory = constants.output_data_markov_modelling stationary_probs_complete = pd.read_csv(input_directory + '/' + 'complete' + '/' + 'stationary_probs.csv') for element in metadata_fields_to_agregate: print(element) mm = pyemma.load(input_directory + element + '_temp/' + 'pyemma_model', 'simple') output_file_name = 'most_imp_path_' + sources[0] + '_' + targets[0] pdb.set_trace() visualize_most_important_paths( mm, features_df, sources[0], targets[0], input_directory + element + '_temp/' + output_file_name) ''' data = mm.P stats = stationary_probs_complete[0:100] stats =stats.rename(columns={'topic_name':'KeywordLabel'}) nodes = features_df.rename(columns={'Unnamed: 0':'index_original'}) del stats['Unnamed: 0'] stats_with_indices = stats.merge(nodes) stats_with_indices = stats_with_indices.sort_values("index_original")
axesflux_list = [i for subl in axes_flux for i in subl] for c, c_bulk, l in zip(conc, concentrations, lag): time = l * ps ax = axes_list.pop(0) ax_flux = axesflux_list.pop(0) index = pd.MultiIndex.from_product([[c], state_labels], names=['[{}]'.format(mol), 'State']) print("Calculating models for {} and lag {} timesteps = {} ps".format( c, l, time)) ###Calculate bMSM model with lag l model = "{}bMSM-{}-{}-{}-s{}-{}ps.npy".format(work_path, prot, mol, c, dt, time) if os.path.exists(model): bmsm = pyemma.load(model) print( "\tbMSM model found ({}), skipping calculations \n".format(model)) else: print("\tReading data...") kNAC = pyemma.coordinates.load( '{}kNAC-{}-{}-{}-i{}-o{}-s{}.npy'.format(work_path, prot, mol, c, start * ps, stop, dt)) #, stride=2) print("\tBayesian MSM with lag time {} ({} ps)....\n".format(l, time)) bmsm = pyemma.msm.bayesian_markov_model(kNAC.flatten().astype(int), lag=l, dt_traj='{} ps'.format(ps), conf=0.95) bmsm.save(model, overwrite=True)
weights='empirical') print("time1", time.time() - time_start) slow_modes = tica_obj_tmp.get_output(stride=vamp_stride) tica_timescales = tica_obj_tmp.timescales print("tica_timescales", tica_timescales) print("slow modes", slow_modes[0][0, :10]) yall_slow_modes = np.concatenate(slow_modes) yall = yall_slow_modes yall_slow_modes.shape figX, axX = pyemma.plots.plot_free_energy(yall_slow_modes[:, 0], yall_slow_modes[:, 1]) figX.savefig(resultspath + name_data + 'ticafe_i' + str(iter_found) + '.png') if Kconfig.project_tica == 'True': tica_obj = pyemma.load(refticapath) yticaproj = np.concatenate(tica_obj.transform(data)) xlim = (-2.2, 1.7) ylim = (-2.5, 3.4) vmax_set = 10 step = 0.1 levels = np.arange(0, vmax_set + step, step) figX, axX, mi = pyemma.plots.plot_free_energy(yticaproj[:, 0], yticaproj[:, 1], levels=levels, legacy=False) axX.set_xlim(xlim) axX.set_ylim(ylim) axX.set_xlabel('TICA 0') axX.set_ylabel('TICA 1') figX.savefig(resultspath + name_data + "ticaprojreffe_i" +
formatter_class=argparse.ArgumentDefaultsHelpFormatter) h = ('HDF5 file containing a saved PyEMMA PCA or TICA object ' + 'equipped with a FeatureReader') parser.add_argument('--model', metavar='MODELFILE', default='pca.h5', help=h) h = ('NumPy .npz file containing the projected trajectories ' + 'indexed by original filenames') parser.add_argument('--output', metavar='OUTPUTFILE', default='ptrajs.npz', help=h) h = ('trajectory file compatible with the FeatureReader of the ' + 'transformation object saved in MODELFILE') parser.add_argument('trajfiles', metavar='TRAJFILE', nargs='+', help=h) args = parser.parse_args() transformer = pyemma.load(args.model) for i, trajfile in enumerate(args.trajfiles): data = pyemma.coordinates.load( trajfile, features=transformer.data_producer.featurizer) np.save(f'.{i}.npy', transformer.transform(data)) ptrajs = { trajfile: np.load(f'.{i}.npy') for i, trajfile in enumerate(args.trajfiles) } np.savez(args.output, **ptrajs) for i in range(len(args.trajfiles)): os.remove(f'.{i}.npy')
def main(argv=None): import six if six.PY2: print('This tool is only available for Python3.') sys.exit(1) import argparse from pyemma import load from pyemma._base.serialization.h5file import H5File parser = argparse.ArgumentParser() parser.add_argument('--json', action='store_true', default=False) parser.add_argument('files', metavar='files', nargs='+', help='files to inspect') parser.add_argument( '--recursive', action='store_true', default=False, help='If the pipeline of the stored estimator was stored, ' 'gather these information as well. This will require to load the model, ' 'so it could take a while, if the pipeline contains lots of data.') parser.add_argument('-v', '--verbose', action='store_true', default=False) args = parser.parse_args(argv) # store found models by filename from collections import defaultdict models = defaultdict(dict) for f in args.files: try: with H5File(f) as fh: m = fh.models_descriptive for k in m: models[f][k] = m[k] for model_name, values in m.items(): if values['saved_streaming_chain']: restored = load(f) models[f][model_name]['input_chain'] = [ repr(x) for x in restored._data_flow_chain() ] except BaseException as e: print( '{} did not contain a valid PyEMMA model. Error was {err}. ' 'If you are sure, that it does, please post an issue on Github' .format(f, err=e)) if args.verbose: import traceback traceback.print_exc() return 1 if not args.json: from io import StringIO buff = StringIO() buff.write('PyEMMA models\n') buff.write('=' * (buff.tell() - 1)) buff.write('\n' * 2) for f in models: buff.write('file: {}'.format(f)) buff.write('\n') buff.write('-' * 80) buff.write('\n') model_file = models[f] for i, model_name in enumerate(model_file): attrs = model_file[model_name] buff.write('{index}. name: {key}\n' 'created: {created}\n' '{repr}\n'.format(key=model_name, index=i + 1, created=attrs['created_readable'], repr=attrs['class_str'])) if attrs['saved_streaming_chain']: buff.write('\n---------Input chain---------\n') for j, x in enumerate(attrs['input_chain']): buff.write('{index}. {repr}\n'.format(index=j + 1, repr=x)) buff.write('-' * 80) buff.write('\n') buff.seek(0) print(buff.read()) else: import json json.dump(models, fp=sys.stdout) return 0