def __init__(self, model, estimator, memberships, mlags=None, conf=0.95, err_est=False, n_jobs=1, show_progress=True): """ Parameters ---------- memberships : ndarray(n, m) Set memberships to calculate set probabilities. n must be equal to the number of active states in model. m is the number of sets. memberships must be a row-stochastic matrix (the rows must sum up to 1). """ LaggedModelValidator.__init__(self, model, estimator, mlags=mlags, conf=conf, n_jobs=n_jobs, show_progress=show_progress) # check and store parameters self.memberships = types.ensure_ndarray(memberships, ndim=2, kind='numeric') self.nstates, self.nsets = memberships.shape assert np.allclose(memberships.sum(axis=1), np.ones(self.nstates)) # stochastic matrix? # active set self.active_set = types.ensure_ndarray(np.array(estimator.active_set), kind='i') # create a copy # map from the full set (here defined by the largest state index in active set) to active self._full2active = np.zeros(np.max(self.active_set)+1, dtype=int) self._full2active[self.active_set] = np.arange(self.nstates) # define starting distribution self.P0 = memberships * model.stationary_distribution[:, None] self.P0 /= self.P0.sum(axis=0) # column-normalize self.err_est = err_est # TODO: this is currently unused
def expectation(self, a): r"""Equilibrium expectation value of a given observable. Parameters ---------- a : (n,) ndarray Observable vector on the MSM state space Returns ------- val: float Equilibrium expectation value fo the given observable Notes ----- The equilibrium expectation value of an observable :math:`a` is defined as follows .. math:: \mathbb{E}_{\mu}[a] = \sum_i \pi_i a_i :math:`\pi=(\pi_i)` is the stationary vector of the transition matrix :math:`P`. """ # check input and go a = _types.ensure_ndarray(a, ndim=1, size=self.nstates, kind='numeric') return _np.dot(a, self.stationary_distribution)
def propagate(self, p0, k): """ Propagates the initial distribution p0 k times Computes the product .. math:: p_k = p_0^T P^k If the lag time of transition matrix :math:`P` is :math:`\tau`, this will provide the probability distribution at time :math:`k \tau`. :param p0: ndarray - initial distribution. Vector of size of the active set :param k: int - number of time steps :return: ndarray - distribution after k steps, vector of size of the active set """ p0 = _types.ensure_ndarray(p0, ndim=1, kind='numeric') assert _types.is_int(k) and k >= 0, 'k must be a non-negative integer' if k == 0 or k == 1: return self.eval(0).propagate(p0, k).real else: pprop = self.eval(0).propagate(p0, 1).real for i in range(1, k): pprop = self.eval(i).propagate(pprop, 1).real return pprop
def __init__(self, bias_energies_full, lag, count_mode='sliding', connectivity='largest', maxiter=10000, maxerr=1E-15, dt_traj='1 step', save_convergence_info=0, init=None): # set all parameters self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric') self.lag = lag assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\'' self.count_mode = count_mode assert connectivity == 'largest', 'Currently the only implemented connectivity is \'largest\'' self.connectivity = connectivity assert init in ( None, 'wham'), 'Currently only None and \'wham\' are supported' self.init = init self.dt_traj = dt_traj self.maxiter = maxiter self.maxerr = maxerr self.save_convergence_info = save_convergence_info # set derived quantities self.nthermo, self.nstates_full = bias_energies_full.shape self.timestep_traj = _TimeUnit(dt_traj) # set iteration variables self.therm_energies = None self.conf_energies = None self.log_lagrangian_mult = None
def _param_init(self): indim = self.data_producer.dimension() assert indim > 0, "zero dimension from data producer" assert self._dim <= indim, ( "requested more output dimensions (%i) than dimension" " of input data (%i)" % (self._dim, indim)) if self._force_eigenvalues_le_one and self._lag % self._param_with_stride != 0: raise RuntimeError( "When using TICA with force_eigenvalues_le_one, lag must be a multiple of stride." ) if self.mu is not None: self.mu = types.ensure_ndarray(self.mu, shape=(indim, )) self._given_mean = True else: self.mu = np.zeros(indim) self._given_mean = False self._N_mean = 0 self._N_cov = 0 self._N_cov_tau = 0 # create covariance matrices self.cov = np.zeros((indim, indim)) self.cov_tau = np.zeros_like(self.cov) self._logger.debug( "Running TICA with tau=%i; Estimating two covariance matrices" " with dimension (%i, %i)" % (self._lag, indim, indim)) # amount of chunks denom = self._n_chunks(self._param_with_stride) self._progress_register(denom, "calculate mean", 0) self._progress_register(denom, "calculate covariances", 1) return 0 # in zero'th pass don't request lagged data
def fingerprint_relaxation(self, p0, a, k=None, ncv=None): # basic checks for a and b p0 = _types.ensure_ndarray(p0, ndim=1, kind='numeric') a = _types.ensure_ndarray(a, ndim=1, kind='numeric', size=len(p0)) # are we on microstates space? if len(a) == self.nstates_obs: p0 = _np.dot(self.observation_probabilities, p0) a = _np.dot(self.observation_probabilities, a) # now we are on macrostate space, or something is wrong if len(a) == self.nstates: return _MSM.fingerprint_relaxation(self, p0, a) else: raise ValueError( 'observable vectors have size %s which is incompatible with both hidden (%s)' ' and observed states (%s)' % (len(a), self.nstates, self.nstates_obs))
def test_estimator(self, test_estimator): self._test_estimator = test_estimator self.active_set = types.ensure_ndarray(np.array( test_estimator.active_set), kind='i') # create a copy # map from the full set (here defined by the largest state index in active set) to active self._full2active = np.zeros(np.max(self.active_set) + 1, dtype=int) self._full2active[self.active_set] = np.arange(self.nstates)
def __init__(self, test_model, test_estimator, mlags=None, conf=0.95, err_est=False, n_jobs=None, show_progress=True): # set model and estimator # copy the test model, since the estimation of cktest modifies the model. from copy import deepcopy self.test_model = deepcopy(test_model) self.test_estimator = test_estimator # set mlags try: maxlength = np.max([ len(dtraj) for dtraj in test_estimator.discrete_trajectories_full ]) except AttributeError: maxlength = np.max(test_estimator.trajectory_lengths()) maxmlag = int(math.floor(maxlength / test_estimator.lag)) if mlags is None: mlags = maxmlag if types.is_int(mlags): mlags = np.arange(mlags) mlags = types.ensure_ndarray(mlags, ndim=1, kind='i') if np.any(mlags > maxmlag): mlags = mlags[np.where(mlags <= maxmlag)] self.logger.warning( 'Changed mlags as some mlags exceeded maximum trajectory length.' ) if np.any(mlags < 0): mlags = mlags[np.where(mlags >= 0)] self.logger.warning('Changed mlags as some mlags were negative.') self.mlags = mlags # set conf and error handling self.conf = conf self.has_errors = issubclass(self.test_model.__class__, SampledModel) if self.has_errors: self.test_model.set_model_params(conf=conf) self.err_est = err_est if err_est and not self.has_errors: raise ValueError( 'Requested errors on the estimated models, ' 'but the model is not able to calculate errors at all') self.n_jobs = n_jobs self.show_progress = show_progress
def propagate(self, p0, k): r""" Propagates the initial distribution p0 k times Computes the product .. math:: p_k = p_0^T P^k If the lag time of transition matrix :math:`P` is :math:`\tau`, this will provide the probability distribution at time :math:`k \tau`. Parameters ---------- p0 : ndarray(n) Initial distribution. Vector of size of the active set. k : int Number of time steps Returns ---------- pk : ndarray(n) Distribution after k steps. Vector of size of the active set. """ p0 = _types.ensure_ndarray(p0, ndim=1, kind='numeric') assert _types.is_int(k) and k >= 0, 'k must be a non-negative integer' if k == 0: # simply return p0 normalized return p0 / p0.sum() micro = False # are we on microstates space? if len(p0) == self.nstates_obs: micro = True # project to hidden and compute p0 = _np.dot(self.observation_probabilities, p0) self._ensure_eigendecomposition(self.nstates) from pyemma.util.linalg import mdot pk = mdot(p0.T, self.eigenvectors_right(), _np.diag(_np.power(self.eigenvalues(), k)), self.eigenvectors_left()) if micro: pk = _np.dot(pk, self.observation_probabilities ) # convert back to microstate space # normalize to 1.0 and return return pk / pk.sum()
def propagate(self, p0, k): r""" Propagates the initial distribution p0 k times Computes the product .. math:: p_k = p_0^T P^k If the lag time of transition matrix :math:`P` is :math:`\tau`, this will provide the probability distribution at time :math:`k \tau`. Parameters ---------- p0 : ndarray(n,) Initial distribution. Vector of size of the active set. k : int Number of time steps Returns ---------- pk : ndarray(n,) Distribution after k steps. Vector of size of the active set. """ p0 = _types.ensure_ndarray(p0, ndim=1, size=self.nstates, kind='numeric') assert _types.is_int(k) and k >= 0, 'k must be a non-negative integer' if k == 0: # simply return p0 normalized return p0 / p0.sum() if self.is_sparse: # sparse: we don't have a full eigenvalue set, so just propagate pk = _np.array(p0) for i in range(k): pk = _np.dot(pk.T, self.transition_matrix) else: # dense: employ eigenvalue decomposition self._ensure_eigendecomposition(self.nstates) from pyemma.util.linalg import mdot pk = mdot(p0.T, self.eigenvectors_right(), _np.diag(_np.power(self.eigenvalues(), k)), self.eigenvectors_left()).real # normalize to 1.0 and return return pk / pk.sum()
def correlation(self, a, b=None, maxtime=None, k=None, ncv=None): # basic checks for a and b a = _types.ensure_ndarray(a, ndim=1, kind='numeric') b = _types.ensure_ndarray_or_None(b, ndim=1, kind='numeric', size=len(a)) # are we on microstates space? if len(a) == self.nstates_obs: a = _np.dot(self.observation_probabilities, a) if b is not None: b = _np.dot(self.observation_probabilities, b) # now we are on macrostate space, or something is wrong if len(a) == self.nstates: return _MSM.correlation(self, a, b=b, maxtime=maxtime) else: raise ValueError( 'observable vectors have size %s which is incompatible with both hidden (%s)' ' and observed states (%s)' % (len(a), self.nstates, self.nstates_obs))
def _param_init(self): self._N_mean = 0 self._N_cov = 0 # create mean array and covariance matrix indim = self.data_producer.dimension() self._logger.info("Running PCA on %i dimensional input" % indim) assert indim > 0, "Incoming data of PCA has 0 dimension!" if self.mu is not None: self.mu = types.ensure_ndarray(self.mu, shape=(indim,)) self._given_mean = True else: self.mu = np.zeros(indim) self._given_mean = False self.cov = np.zeros((indim, indim)) # amount of chunks denom = self._n_chunks(self._param_with_stride) self._progress_register(denom, description="calculate mean", stage=0) self._progress_register(denom, description="calculate covariances", stage=1)
def __init__(self, bias_energies_full, maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step', stride=1): self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric') self.stride = stride self.dt_traj = dt_traj self.maxiter = maxiter self.maxerr = maxerr self.save_convergence_info = save_convergence_info # set derived quantities self.nthermo, self.nstates_full = bias_energies_full.shape self.timestep_traj = _TimeUnit(dt_traj) # set iteration variables self.therm_energies = None self.conf_energies = None
def memberships(self, value): self._memberships = types.ensure_ndarray(value, ndim=2, kind='numeric') self.nstates, self.nsets = self._memberships.shape assert np.allclose(self._memberships.sum(axis=1), np.ones(self.nstates)) # stochastic matrix?
def plot_network(self, state_sizes=None, state_scale=1.0, state_colors='#ff5500', state_labels='auto', arrow_scale=1.0, arrow_curvature=1.0, arrow_labels='weights', arrow_label_format='%10.2f', max_width=12, max_height=12, figpadding=0.2, xticks=False, yticks=False, show_frame=False, **textkwargs): """ Draws a network using discs and curved arrows. The thicknesses and labels of the arrows are taken from the off-diagonal matrix elements in A. """ if self.pos is None: self.layout_automatic() # number of nodes n = len(self.pos) # get bounds and pad figure xmin = np.min(self.pos[:, 0]) xmax = np.max(self.pos[:, 0]) Dx = xmax - xmin xmin -= Dx * figpadding xmax += Dx * figpadding Dx *= 1 + figpadding ymin = np.min(self.pos[:, 1]) ymax = np.max(self.pos[:, 1]) Dy = ymax - ymin ymin -= Dy * figpadding ymax += Dy * figpadding Dy *= 1 + figpadding # sizes of nodes if state_sizes is None: state_sizes = 0.5 * state_scale * \ min(Dx, Dy)**2 * np.ones(n) / float(n) else: state_sizes = 0.5 * state_scale * \ min(Dx, Dy)**2 * state_sizes / (np.max(state_sizes) * float(n)) # automatic arrow rescaling arrow_scale *= 1.0 / \ (np.max(self.A - np.diag(np.diag(self.A))) * math.sqrt(n)) # size figure if (Dx / max_width > Dy / max_height): figsize = (max_width, Dy * (max_width / Dx)) else: figsize = (Dx / Dy * max_height, max_height) fig = plt.gcf() fig.set_size_inches(figsize, forward=True) # font sizes old_fontsize = rcParams['font.size'] rcParams['font.size'] = 20 # remove axis labels frame = plt.gca() if not xticks: frame.axes.get_xaxis().set_ticks([]) if not yticks: frame.axes.get_yaxis().set_ticks([]) # show or suppress frame frame.set_frame_on(show_frame) # set node labels if state_labels is 'auto': state_labels = [str(i) for i in np.arange(n)] else: assert len( state_labels ) == n, "Mistmatch between nstates and nr. state_labels (%u vs %u)" % ( n, len(state_labels)) # set node colors if state_colors is None: state_colors = '#ff5500' # None is not acceptable if isinstance(state_colors, str): state_colors = [state_colors] * n if isinstance(state_colors, list): assert len( state_colors ) == n, "Mistmatch between nstates and nr. state_colors (%u vs %u)" % ( n, len(state_colors)) try: colorscales = _types.ensure_ndarray(state_colors, ndim=1, kind='numeric') colorscales /= colorscales.max() state_colors = [ plt.cm.binary(int(256.0 * colorscales[i])) for i in range(n) ] except: pass # assume we have a list of strings now. # set arrow labels if isinstance(arrow_labels, np.ndarray): L = arrow_labels else: L = np.empty(np.shape(self.A), dtype=object) if arrow_labels is None: L[:, :] = '' elif arrow_labels.lower() == 'weights': for i in range(n): for j in range(n): L[i, j] = arrow_label_format % self.A[i, j] else: rcParams['font.size'] = old_fontsize raise ValueError('invalid arrow label format') # Set the default values for the text dictionary textkwargs.setdefault('size', 14) textkwargs.setdefault('horizontalalignment', 'center') textkwargs.setdefault('verticalalignment', 'center') textkwargs.setdefault('color', 'black') # draw circles circles = [] for i in range(n): fig = plt.gcf() # choose color c = plt.Circle(self.pos[i], radius=math.sqrt(0.5 * state_sizes[i]) / 2.0, color=state_colors[i], zorder=2) circles.append(c) fig.gca().add_artist(c) # add annotation plt.text(self.pos[i][0], self.pos[i][1], state_labels[i], zorder=3, **textkwargs) assert len(circles) == n, "%i != %i" % (len(circles), n) # draw arrows for i in range(n): for j in range(i + 1, n): if (abs(self.A[i, j]) > 0): self._draw_arrow(self.pos[i, 0], self.pos[i, 1], self.pos[j, 0], self.pos[j, 1], Dx, Dy, label=str(L[i, j]), width=arrow_scale * self.A[i, j], arrow_curvature=arrow_curvature, patchA=circles[i], patchB=circles[j], shrinkA=3, shrinkB=0) if (abs(self.A[j, i]) > 0): self._draw_arrow(self.pos[j, 0], self.pos[j, 1], self.pos[i, 0], self.pos[i, 1], Dx, Dy, label=str(L[j, i]), width=arrow_scale * self.A[j, i], arrow_curvature=arrow_curvature, patchA=circles[j], patchB=circles[i], shrinkA=3, shrinkB=0) # plot plt.xlim(xmin, xmax) plt.ylim(ymin, ymax) rcParams['font.size'] = old_fontsize return fig
def __init__( self, bias_energies_full, lag, count_mode='sliding', connectivity='largest', maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step', init=None, init_maxiter=10000, init_maxerr=1.0E-8): r""" Discrete Transition(-based) Reweighting Analysis Method Parameters ---------- bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i at thermodynamic state j. lag : int Integer lag time at which transitions are counted. count_mode : str, optional, default='sliding' Mode to obtain count matrices from discrete trajectories. Should be one of: * 'sliding' : a trajectory of length T will have :math:`T-\tau` counts at time indexes .. math:: (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1) * 'sample' : a trajectory of length T will have :math:`T/\tau` counts at time indexes .. math:: (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T) Currently only 'sliding' is supported. connectivity : str, optional, default='largest' Defines what should be considered a connected set in the joint space of conformations and thermodynamic ensembles. Currently only 'largest' is supported. maxiter : int, optional, default=10000 The maximum number of self-consistent iterations before the estimator exits unsuccessfully. maxerr : float, optional, default=1.0E-15 Convergence criterion based on the maximal free energy change in a self-consistent iteration step. save_convergence_info : int, optional, default=0 Every save_convergence_info iteration steps, store the actual increment and the actual loglikelihood; 0 means no storage. dt_traj : str, optional, default='1 step' Description of the physical time corresponding to the lag. May be used by analysis algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e. there is no physical time unit. Specify by a number, whitespace and unit. Permitted units are (* is an arbitrary string): | 'fs', 'femtosecond*' | 'ps', 'picosecond*' | 'ns', 'nanosecond*' | 'us', 'microsecond*' | 'ms', 'millisecond*' | 's', 'second*' init : str, optional, default=None Use a specific initialization for self-consistent iteration: | None: use a hard-coded guess for free energies and Lagrangian multipliers | 'wham': perform a short WHAM estimate to initialize the free energies init_maxiter : int, optional, default=10000 The maximum number of self-consistent iterations during the initialization. init_maxerr : float, optional, default=1.0E-8 Convergence criterion for the initialization. Example ------- >>> from pyemma.thermo import DTRAM >>> import numpy as np >>> B = np.array([[0, 0],[0.5, 1.0]]) >>> dtram = DTRAM(B, 1) >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])] >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])] >>> dtram = dtram.estimate((ttrajs, dtrajs)) >>> dtram.log_likelihood() # doctest: +ELLIPSIS -9.805... >>> dtram.count_matrices # doctest: +SKIP array([[[5, 1], [1, 2]], [[1, 4], [3, 1]]], dtype=int32) >>> dtram.stationary_distribution # doctest: +ELLIPSIS array([ 0.38..., 0.61...]) >>> dtram.meval('stationary_distribution') # doctest: +ELLIPSIS [array([ 0.38..., 0.61...]), array([ 0.50..., 0.49...])] References ---------- .. [1] Wu, H. et al 2014 Statistically optimal analysis of state-discretized trajectory data from multiple thermodynamic states J. Chem. Phys. 141, 214106 """ # set all parameters self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric') self.lag = lag assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\'' self.count_mode = count_mode assert connectivity == 'largest', 'Currently the only implemented connectivity is \'largest\'' self.connectivity = connectivity self.dt_traj = dt_traj self.maxiter = maxiter self.maxerr = maxerr self.save_convergence_info = save_convergence_info assert init in (None, 'wham'), 'Currently only None and \'wham\' are supported' self.init = init self.init_maxiter = init_maxiter self.init_maxerr = init_maxerr # set derived quantities self.nthermo, self.nstates_full = bias_energies_full.shape self.timestep_traj = _TimeUnit(dt_traj) # set iteration variables self.therm_energies = None self.conf_energies = None self.log_lagrangian_mult = None
def cktest(self, n_observables=None, observables='phi', statistics='psi', mlags=10, n_jobs=1, show_progress=True, iterable=None): r"""Do the Chapman-Kolmogorov test by computing predictions for higher lag times and by performing estimations at higher lag times. Notes ----- This method computes two sets of time-lagged covariance matrices * estimates at higher lag times : .. math:: \left\langle \mathbf{K}(n\tau)g_{i},f_{j}\right\rangle_{\rho_{0}} where :math:`\rho_{0}` is the empirical distribution implicitly defined by all data points from time steps 0 to T-tau in all trajectories, :math:`\mathbf{K}(n\tau)` is a rank-reduced Koopman matrix estimated at the lag-time n*tau and g and f are some functions of the data. Rank-reduction of the Koopman matrix is controlled by the `dim` parameter of :func:`vamp <pyemma.coordinates.vamp>`. * predictions at higher lag times : .. math:: \left\langle \mathbf{K}^{n}(\tau)g_{i},f_{j}\right\rangle_{\rho_{0}} where :math:`\mathbf{K}^{n}` is the n'th power of the rank-reduced Koopman matrix contained in self. The Champan-Kolmogorov test is to compare the predictions to the estimates. Parameters ---------- n_observables : int, optional, default=None Limit the number of default observables (and of default statistics) to this number. Only used if `observables` are None or `statistics` are None. observables : np.ndarray((input_dimension, n_observables)) or 'phi' Coefficients that express one or multiple observables :math:`g` in the basis of the input features. This parameter can be 'phi'. In that case, the dominant right singular functions of the Koopman operator estimated at the smallest lag time are used as default observables. statistics : np.ndarray((input_dimension, n_statistics)) or 'psi' Coefficients that express one or multiple statistics :math:`f` in the basis of the input features. This parameter can be 'psi'. In that case, the dominant left singular functions of the Koopman operator estimated at the smallest lag time are used as default statistics. mlags : int or int-array, default=10 multiples of lag times for testing the Model, e.g. range(10). A single int will trigger a range, i.e. mlags=10 maps to mlags=range(10). Note that you need to be able to do a model prediction for each of these lag time multiples, e.g. the value 0 only make sense if model.expectation(lag_multiple=0) will work. n_jobs : int, default=1 how many jobs to use during calculation show_progress : bool, default=True Show progressbars for calculation? iterable : any data format that `pyemma.coordinates.vamp()` accepts as input, optional It `iterable` is None, the same data source with which VAMP was initialized will be used for all estimation. Otherwise, all estimates (not predictions) from data will be computed from the data contained in `iterable`. Returns ------- vckv : :class:`VAMPChapmanKolmogorovValidator <pyemma.coordinates.transform.VAMPChapmanKolmogorovValidator>` Contains the estimated and the predicted covarince matrices. The object can be plotted with :func:`plot_cktest <pyemma.plots.plot_cktest>` with the option `y01=False`. """ if n_observables is not None: if n_observables > self.dimension(): warnings.warn( 'Selected singular functions as observables but dimension ' 'is lower than requested number of observables.') n_observables = self.dimension() else: n_observables = self.dimension() if isinstance(observables, str) and observables == 'phi': observables = self.singular_vectors_right[:, 0:n_observables] observables_mean_free = True else: ensure_ndarray(observables, ndim=2) observables_mean_free = False if isinstance(statistics, str) and statistics == 'psi': statistics = self.singular_vectors_left[:, 0:n_observables] statistics_mean_free = True else: ensure_ndarray_or_None(statistics, ndim=2) statistics_mean_free = False ck = VAMPChapmanKolmogorovValidator(self.model, self, observables, statistics, observables_mean_free, statistics_mean_free, mlags=mlags, n_jobs=n_jobs, show_progress=show_progress) if iterable is None: iterable = self.data_producer ck.estimate(iterable) return ck
def __init__(self, bias_energies_full, lag, count_mode='sliding', connectivity='reversible_pathways', maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step', init=None, init_maxiter=10000, init_maxerr=1.0E-8): r""" Discrete Transition(-based) Reweighting Analysis Method Parameters ---------- bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i at thermodynamic state j. lag : int Integer lag time at which transitions are counted. count_mode : str, optional, default='sliding' Mode to obtain count matrices from discrete trajectories. Should be one of: * 'sliding' : a trajectory of length T will have :math:`T-\tau` counts at time indexes .. math:: (0 \rightarrow \tau), (1 \rightarrow \tau+1), ..., (T-\tau-1 \rightarrow T-1) * 'sample' : a trajectory of length T will have :math:`T/\tau` counts at time indexes .. math:: (0 \rightarrow \tau), (\tau \rightarrow 2 \tau), ..., ((T/\tau-1) \tau \rightarrow T) Currently only 'sliding' is supported. connectivity : str, optional, default='reversible_pathways' One of 'reversible_pathways', 'summed_count_matrix' or None. Defines what should be considered a connected set in the joint (product) space of conformations and thermodynamic ensembles. * 'reversible_pathways' : requires that every state in the connected set can be reached by following a pathway of reversible transitions. A reversible transition between two Markov states (within the same thermodynamic state k) is a pair of Markov states that belong to the same strongly connected component of the count matrix (from thermodynamic state k). A pathway of reversible transitions is a list of reversible transitions [(i_1, i_2), (i_2, i_3),..., (i_(N-2), i_(N-1)), (i_(N-1), i_N)]. The thermodynamic state where the reversible transitions happen, is ignored in constructing the reversible pathways. This is equivalent to assuming that two ensembles overlap at some Markov state whenever there exist frames from both ensembles in that Markov state. * 'summed_count_matrix' : all thermodynamic states are assumed to overlap. The connected set is then computed by summing the count matrices over all thermodynamic states and taking it's largest strongly connected set. Not recommended! * None : assume that everything is connected. For debugging. For more details see :func:`pyemma.thermo.extensions.cset.compute_csets_dTRAM`. maxiter : int, optional, default=10000 The maximum number of self-consistent iterations before the estimator exits unsuccessfully. maxerr : float, optional, default=1.0E-15 Convergence criterion based on the maximal free energy change in a self-consistent iteration step. save_convergence_info : int, optional, default=0 Every save_convergence_info iteration steps, store the actual increment and the actual log-likelihood; 0 means no storage. dt_traj : str, optional, default='1 step' Description of the physical time corresponding to the lag. May be used by analysis algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e. there is no physical time unit. Specify by a number, whitespace and unit. Permitted units are (* is an arbitrary string): | 'fs', 'femtosecond*' | 'ps', 'picosecond*' | 'ns', 'nanosecond*' | 'us', 'microsecond*' | 'ms', 'millisecond*' | 's', 'second*' init : str, optional, default=None Use a specific initialization for self-consistent iteration: | None: use a hard-coded guess for free energies and Lagrangian multipliers | 'wham': perform a short WHAM estimate to initialize the free energies init_maxiter : int, optional, default=10000 The maximum number of self-consistent iterations during the initialization. init_maxerr : float, optional, default=1.0E-8 Convergence criterion for the initialization. Example ------- >>> from pyemma.thermo import DTRAM >>> import numpy as np >>> B = np.array([[0, 0],[0.5, 1.0]]) >>> dtram = DTRAM(B, 1) >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])] >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])] >>> dtram = dtram.estimate((ttrajs, dtrajs)) >>> dtram.log_likelihood() # doctest: +ELLIPSIS -9.805... >>> dtram.count_matrices # doctest: +SKIP array([[[5, 1], [1, 2]], [[1, 4], [3, 1]]], dtype=int32) >>> dtram.stationary_distribution # doctest: +ELLIPSIS array([ 0.38..., 0.61...]) >>> dtram.meval('stationary_distribution') # doctest: +ELLIPSIS [array([ 0.38..., 0.61...]), array([ 0.50..., 0.49...])] References ---------- .. [1] Wu, H. et al 2014 Statistically optimal analysis of state-discretized trajectory data from multiple thermodynamic states J. Chem. Phys. 141, 214106 """ # set all parameters self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric') self.lag = lag assert count_mode == 'sliding', 'Currently the only implemented count_mode is \'sliding\'' self.count_mode = count_mode assert connectivity in [ None, 'reversible_pathways', 'summed_count_matrix' ], \ 'Currently the only implemented connectivity checks are \'reversible_pathways\', \'summed_count_matrix\' and None' self.connectivity = connectivity self.dt_traj = dt_traj self.maxiter = maxiter self.maxerr = maxerr self.save_convergence_info = save_convergence_info assert init in ( None, 'wham'), 'Currently only None and \'wham\' are supported' self.init = init self.init_maxiter = init_maxiter self.init_maxerr = init_maxerr # set derived quantities self.nthermo, self.nstates_full = bias_energies_full.shape # set iteration variables self.therm_energies = None self.conf_energies = None self.log_lagrangian_mult = None
def __init__(self, bias_energies_full, maxiter=10000, maxerr=1.0E-15, save_convergence_info=0, dt_traj='1 step', stride=1): r"""Weighted Histogram Analysis Method Parameters ---------- bias_energies_full : numpy.ndarray(shape=(num_therm_states, num_conf_states)) object bias_energies_full[j, i] is the bias energy in units of kT for each discrete state i at thermodynamic state j. maxiter : int, optional, default=10000 The maximum number of self-consistent iterations before the estimator exits unsuccessfully. maxerr : float, optional, default=1.0E-15 Convergence criterion based on the maximal free energy change in a self-consistent iteration step. save_convergence_info : int, optional, default=0 Every save_convergence_info iteration steps, store the actual increment and the actual loglikelihood; 0 means no storage. dt_traj : str, optional, default='1 step' Description of the physical time corresponding to the lag. May be used by analysis algorithms such as plotting tools to pretty-print the axes. By default '1 step', i.e. there is no physical time unit. Specify by a number, whitespace and unit. Permitted units are (* is an arbitrary string): | 'fs', 'femtosecond*' | 'ps', 'picosecond*' | 'ns', 'nanosecond*' | 'us', 'microsecond*' | 'ms', 'millisecond*' | 's', 'second*' stride : int, optional, default=1 not used Example ------- >>> from pyemma.thermo import WHAM >>> import numpy as np >>> B = np.array([[0, 0],[0.5, 1.0]]) >>> wham = WHAM(B) >>> ttrajs = [np.array([0,0,0,0,0,0,0,0,0,0]),np.array([1,1,1,1,1,1,1,1,1,1])] >>> dtrajs = [np.array([0,0,0,0,1,1,1,0,0,0]),np.array([0,1,0,1,0,1,1,0,0,1])] >>> wham = wham.estimate((ttrajs, dtrajs)) >>> wham.log_likelihood() # doctest: +ELLIPSIS -6.6... >>> wham.state_counts # doctest: +SKIP array([[7, 3], [5, 5]]) >>> wham.stationary_distribution # doctest: +ELLIPSIS +REPORT_NDIFF array([ 0.5..., 0.4...]) >>> wham.meval('stationary_distribution') # doctest: +ELLIPSIS +REPORT_NDIFF [array([ 0.5..., 0.4...]), array([ 0.6..., 0.3...])] References ---------- .. [1] Ferrenberg, A.M. and Swensen, R.H. 1988. New Monte Carlo Technique for Studying Phase Transitions. Phys. Rev. Lett. 23, 2635--2638 .. [2] Kumar, S. et al 1992. The Weighted Histogram Analysis Method for Free-Energy Calculations on Biomolecules. I. The Method. J. Comp. Chem. 13, 1011--1021 """ self.bias_energies_full = _types.ensure_ndarray(bias_energies_full, ndim=2, kind='numeric') self.stride = stride self.dt_traj = dt_traj self.maxiter = maxiter self.maxerr = maxerr self.save_convergence_info = save_convergence_info # set derived quantities self.nthermo, self.nstates_full = bias_energies_full.shape # set iteration variables self.therm_energies = None self.conf_energies = None
def plot_network(self, state_sizes=None, state_scale=1.0, state_colors='#ff5500', state_labels='auto', arrow_scale=1.0, arrow_curvature=1.0, arrow_labels='weights', arrow_label_format='%10.2f', max_width=12, max_height=12, figpadding=0.2, xticks=False, yticks=False, show_frame=False, **textkwargs): """ Draws a network using discs and curved arrows. The thicknesses and labels of the arrows are taken from the off-diagonal matrix elements in A. """ # Set the default values for the text dictionary from matplotlib import pyplot as _plt textkwargs.setdefault('size', None) textkwargs.setdefault('horizontalalignment', 'center') textkwargs.setdefault('verticalalignment', 'center') textkwargs.setdefault('color', 'black') # remove the temporary key 'arrow_label_size' as it cannot be parsed by plt.text! arrow_label_size = textkwargs.pop('arrow_label_size', textkwargs['size']) if self.pos is None: self.layout_automatic() # number of nodes n = len(self.pos) # get bounds and pad figure xmin = _np.min(self.pos[:, 0]) xmax = _np.max(self.pos[:, 0]) Dx = xmax - xmin xmin -= Dx * figpadding xmax += Dx * figpadding Dx *= 1 + figpadding ymin = _np.min(self.pos[:, 1]) ymax = _np.max(self.pos[:, 1]) Dy = ymax - ymin ymin -= Dy * figpadding ymax += Dy * figpadding Dy *= 1 + figpadding # sizes of nodes if state_sizes is None: state_sizes = 0.5 * state_scale * \ min(Dx, Dy)**2 * _np.ones(n) / float(n) else: state_sizes = 0.5 * state_scale * \ min(Dx, Dy)**2 * state_sizes / (_np.max(state_sizes) * float(n)) # automatic arrow rescaling arrow_scale *= 1.0 / \ (_np.max(self.A - _np.diag(_np.diag(self.A))) * _sqrt(n)) # size figure if (Dx / max_width > Dy / max_height): figsize = (max_width, Dy * (max_width / Dx)) else: figsize = (Dx / Dy * max_height, max_height) if self.ax is None: logger.debug("creating new figure") fig = _plt.figure(None, figsize=figsize) self.ax = fig.add_subplot(111) else: fig = self.ax.figure window_extend = self.ax.get_window_extent() axes_ratio = window_extend.height / window_extend.width data_ratio = (ymax - ymin) / (xmax - xmin) q = axes_ratio / data_ratio if q > 1.0: ymin *= q ymax *= q else: xmin /= q xmax /= q if not xticks: self.ax.get_xaxis().set_ticks([]) if not yticks: self.ax.get_yaxis().set_ticks([]) # show or suppress frame self.ax.set_frame_on(show_frame) # set node labels if state_labels is None: pass elif isinstance(state_labels, str) and state_labels == 'auto': state_labels = [str(i) for i in _np.arange(n)] else: if len(state_labels) != n: raise ValueError( "length of state_labels({}) has to match length of states({})." .format(len(state_labels), n)) # set node colors if state_colors is None: state_colors = '#ff5500' # None is not acceptable if isinstance(state_colors, str): state_colors = [state_colors] * n if isinstance(state_colors, list) and not len(state_colors) == n: raise ValueError( "Mistmatch between nstates and nr. state_colors (%u vs %u)" % (n, len(state_colors))) try: colorscales = _types.ensure_ndarray(state_colors, ndim=1, kind='numeric') colorscales /= colorscales.max() state_colors = [ _plt.cm.binary(int(256.0 * colorscales[i])) for i in range(n) ] except AssertionError: # assume we have a list of strings now. logger.debug("could not cast 'state_colors' to numeric values.") # set arrow labels if isinstance(arrow_labels, _np.ndarray): L = arrow_labels if isinstance(arrow_labels[0, 0], str): arrow_label_format = '%s' elif isinstance(arrow_labels, str) and arrow_labels.lower() == 'weights': L = self.A[:, :] elif arrow_labels is None: L = _np.empty(_np.shape(self.A), dtype=object) L[:, :] = '' arrow_label_format = '%s' else: raise ValueError('invalid arrow labels') # draw circles circles = [] for i in range(n): # choose color c = _plt.Circle(self.pos[i], radius=_sqrt(0.5 * state_sizes[i]) / 2.0, color=state_colors[i], zorder=2) circles.append(c) self.ax.add_artist(c) # add annotation if state_labels is not None: self.ax.text(self.pos[i][0], self.pos[i][1], state_labels[i], zorder=3, **textkwargs) assert len(circles) == n, "%i != %i" % (len(circles), n) # draw arrows for i in range(n): for j in range(i + 1, n): if (abs(self.A[i, j]) > 0): self._draw_arrow(self.pos[i, 0], self.pos[i, 1], self.pos[j, 0], self.pos[j, 1], Dx, Dy, label=arrow_label_format % L[i, j], width=arrow_scale * self.A[i, j], arrow_curvature=arrow_curvature, patchA=circles[i], patchB=circles[j], shrinkA=3, shrinkB=0, arrow_label_size=arrow_label_size) if (abs(self.A[j, i]) > 0): self._draw_arrow(self.pos[j, 0], self.pos[j, 1], self.pos[i, 0], self.pos[i, 1], Dx, Dy, label=arrow_label_format % L[j, i], width=arrow_scale * self.A[j, i], arrow_curvature=arrow_curvature, patchA=circles[j], patchB=circles[i], shrinkA=3, shrinkB=0, arrow_label_size=arrow_label_size) # plot self.ax.set_xlim(xmin, xmax) self.ax.set_ylim(ymin, ymax) return fig
def tpt(msmobj, A, B): r""" A->B reactive flux from transition path theory (TPT) The returned :class:`ReactiveFlux <msmtools.flux.ReactiveFlux>` object can be used to extract various quantities of the flux, as well as to compute A -> B transition pathways, their weights, and to coarse-grain the flux onto sets of states. Parameters ---------- msmobj : :class:`MSM <pyemma.msm.MSM>` object Markov state model (MSM) object A : array_like List of integer state labels for set A B : array_like List of integer state labels for set B Returns ------- tptobj : :class:`ReactiveFlux <msmtools.flux.ReactiveFlux>` object An object containing the reactive A->B flux network and several additional quantities, such as the stationary probability, committors and set definitions. See also -------- :class:`ReactiveFlux <msmtools.flux.ReactiveFlux>` Reactive Flux object .. autoclass:: msmtools.flux.reactive_flux.ReactiveFlux :members: :undoc-members: .. rubric:: Methods .. autoautosummary:: msmtools.flux.reactive_flux.ReactiveFlux :methods: .. rubric:: Attributes .. autoautosummary:: msmtools.flux.reactive_flux.ReactiveFlux :attributes: References ---------- Transition path theory was introduced for space-continuous dynamical processes, such as Langevin dynamics, in [1]_, [2]_ introduces discrete transition path theory for Markov jump processes (Master equation models, rate matrices) and pathway decomposition algorithms. [3]_ introduces transition path theory for Markov state models (MSMs) and some analysis algorithms. In this function, the equations described in [3]_ are applied. .. [1] W. E and E. Vanden-Eijnden. Towards a theory of transition paths. J. Stat. Phys. 123: 503-523 (2006) .. [2] P. Metzner, C. Schuette and E. Vanden-Eijnden. Transition Path Theory for Markov Jump Processes. Multiscale Model Simul 7: 1192-1219 (2009) .. [3] F. Noe, Ch. Schuette, E. Vanden-Eijnden, L. Reich and T. Weikl: Constructing the Full Ensemble of Folding Pathways from Short Off-Equilibrium Simulations. Proc. Natl. Acad. Sci. USA, 106, 19011-19016 (2009) """ T = msmobj.transition_matrix mu = msmobj.stationary_distribution A = _types.ensure_ndarray(A, kind='i') B = _types.ensure_ndarray(B, kind='i') tptobj = tpt_factory(T, A, B, mu=mu) return tptobj
def __init__(self, model, estimator, mlags=None, conf=0.95, err_est=False, n_jobs=1, show_progress=True): r""" Parameters ---------- model : Model Model to be tested estimator : Estimator Parametrized Estimator that has produced the model mlags : int or int-array, default=10 multiples of lag times for testing the Model, e.g. range(10). A single int will trigger a range, i.e. mlags=10 maps to mlags=range(10). The setting None will choose mlags automatically according to the longest available trajectory Note that you need to be able to do a model prediction for each of these lag time multiples, e.g. the value 0 only make sense if _predict_observables(0) will work. conf : float, default = 0.95 confidence interval for errors err_est : bool, default=False if the Estimator is capable of error calculation, will compute errors for each tau estimate. This option can be computationally expensive. n_jobs : int, default=1 how many jobs to use during calculation show_progress : bool, default=True Show progressbars for calculation? """ # set model and estimator self.test_model = model self.test_estimator = estimator # set mlags maxlength = np.max([len(dtraj) for dtraj in estimator.discrete_trajectories_full]) maxmlag = int(math.floor(maxlength / estimator.lag)) if mlags is None: mlags = maxmlag if types.is_int(mlags): mlags = np.arange(mlags) mlags = types.ensure_ndarray(mlags, ndim=1, kind='i') if np.any(mlags > maxmlag): mlags = mlags[np.where(mlags <= maxmlag)] self.logger.warn('Changed mlags as some mlags exceeded maximum trajectory length.') if np.any(mlags < 0): mlags = mlags[np.where(mlags >= 0)] self.logger.warn('Changed mlags as some mlags were negative.') self.mlags = mlags # set conf and error handling self.conf = conf self.has_errors = issubclass(self.test_model.__class__, SampledModel) if self.has_errors: self.test_model.set_model_params(conf=conf) self.err_est = err_est if err_est and not self.has_errors: raise ValueError('Requested errors on the estimated models, ' 'but the model is not able to calculate errors at all') self.n_jobs = n_jobs self.show_progress = show_progress
def plot_network(self, state_sizes=None, state_scale=1.0, state_colors='#ff5500', state_labels='auto', arrow_scale=1.0, arrow_curvature=1.0, arrow_labels='weights', arrow_label_format='%10.2f', max_width=12, max_height=12, max_flux=None, figpadding=0.2, xticks=False, yticks=False, show_frame=False, **textkwargs): """ Draws a network using discs and curved arrows. The thicknesses and labels of the arrows are taken from the off-diagonal matrix elements in A. """ plt = self.plt if self.pos is None: self.layout_automatic() # number of nodes n = len(self.pos) # get bounds and pad figure xmin = np.min(self.pos[:, 0]) xmax = np.max(self.pos[:, 0]) Dx = xmax - xmin xmin -= Dx * figpadding xmax += Dx * figpadding Dx *= 1 + figpadding ymin = np.min(self.pos[:, 1]) ymax = np.max(self.pos[:, 1]) Dy = ymax - ymin ymin -= Dy * figpadding ymax += Dy * figpadding Dy *= 1 + figpadding # sizes of nodes if state_sizes is None: state_sizes = 0.5 * state_scale * \ min(Dx, Dy)**2 * np.ones(n) / float(n) else: state_sizes = 0.5 * state_scale * \ state_sizes / (np.max(state_sizes) * float(n)) #min(Dx, Dy)**2 * state_sizes / (np.max(state_sizes) * float(n)) # JFR # automatic arrow rescaling **JFR - Soooo confusing, don't do this! #arrow_scale *= 1.0 / \ # (np.max(self.A - np.diag(np.diag(self.A))) * math.sqrt(n)) # size figure if (Dx / max_width > Dy / max_height): figsize = (max_width, Dy * (max_width / Dx)) else: figsize = (Dx / Dy * max_height, max_height) fig = plt.gcf() fig.set_size_inches(figsize, forward=True) # font sizes from matplotlib import rcParams old_fontsize = rcParams['font.size'] rcParams['font.size'] = 20 # remove axis labels frame = plt.gca() if not xticks: frame.axes.get_xaxis().set_ticks([]) if not yticks: frame.axes.get_yaxis().set_ticks([]) # show or suppress frame frame.set_frame_on(show_frame) # set node labels if state_labels is 'auto': state_labels = [str(i) for i in np.arange(n)] else: assert len( state_labels ) == n, "Mistmatch between nstates and nr. state_labels (%u vs %u)" % ( n, len(state_labels)) # set node colors if state_colors is None: state_colors = '#ff5500' # None is not acceptable if isinstance(state_colors, str): state_colors = [state_colors] * n if isinstance(state_colors, list): assert len( state_colors ) == n, "Mistmatch between nstates and nr. state_colors (%u vs %u)" % ( n, len(state_colors)) try: colorscales = _types.ensure_ndarray(state_colors, ndim=1, kind='numeric') colorscales /= colorscales.max() state_colors = [ plt.cm.binary(int(256.0 * colorscales[i])) for i in range(n) ] except: pass # assume we have a list of strings now. # set arrow labels if isinstance(arrow_labels, np.ndarray): L = arrow_labels else: L = np.empty(np.shape(self.A), dtype=object) if arrow_labels is None: L[:, :] = '' elif arrow_labels.lower() == 'weights': for i in range(n): for j in range(n): L[i, j] = arrow_label_format % self.A[i, j] else: rcParams['font.size'] = old_fontsize raise ValueError('invalid arrow label format') # Set the default values for the text dictionary textkwargs.setdefault('size', 14) textkwargs.setdefault('horizontalalignment', 'center') textkwargs.setdefault('verticalalignment', 'center') textkwargs.setdefault('color', 'black') # draw circles circles = [] for i in range(n): fig = plt.gcf() # choose color c = plt.Circle(self.pos[i], radius=math.sqrt(0.5 * state_sizes[i]) / 2.0, color=state_colors[i], zorder=2) circles.append(c) fig.gca().add_artist(c) # add annotation plt.text(self.pos[i][0], self.pos[i][1], state_labels[i], zorder=3, **textkwargs) assert len(circles) == n, "%i != %i" % (len(circles), n) # draw arrows # my own colormap from matplotlib import pyplot as plt from matplotlib.pyplot import * # define the colormap #print self.A #print np.all(self.A >= 0) if (np.all(self.A >= 0)): mycmap = plt.cm.Greys #mycmap = plt.cm.winter mycmap_max = np.max(np.abs(self.A)) mycmap_min = -1. * mycmap_max #0. # np.min(self.A[self.A != 0]) else: mycmap = plt.cm.bwr #mycmap = plt.cm.jet if (max_flux is None): mycmap_max = np.max(np.abs(self.A)) mycmap_min = -mycmap_max else: mycmap_max = max_flux mycmap_min = -mycmap_max # extract all colors from the .jet map mycmaplist = [mycmap(i) for i in range(mycmap.N)] # create the new map mycmap = mycmap.from_list('Custom cmap', mycmaplist, mycmap.N) # define the bins and normalize bounds = np.linspace(mycmap_min, mycmap_max, mycmap.N) norm = matplotlib.colors.BoundaryNorm(bounds, mycmap.N) mycmaplist = [mycmap(i) for i in range(mycmap.N)] dx = bounds[1] - bounds[0] for i in range(n): for j in range(i + 1, n): if (abs(self.A[i, j]) > 0): # JFR - let's allow for neg delta-F grid = int((self.A[i, j] - mycmap_min) / dx + 0.5) color = mycmaplist[grid] self._draw_arrow(self.pos[i, 0], self.pos[i, 1], self.pos[j, 0], self.pos[j, 1], Dx, Dy, label=str(L[i, j]), width=arrow_scale * abs(self.A[i, j]), color=color, arrow_curvature=arrow_curvature, patchA=circles[i], patchB=circles[j], shrinkA=3, shrinkB=0) if (abs(self.A[j, i]) > 0): grid = int((self.A[j, i] - mycmap_min) / dx + 0.5) color = mycmaplist[grid] self._draw_arrow(self.pos[j, 0], self.pos[j, 1], self.pos[i, 0], self.pos[i, 1], Dx, Dy, label=str(L[j, i]), width=arrow_scale * abs(self.A[j, i]), color=color, arrow_curvature=arrow_curvature, patchA=circles[j], patchB=circles[i], shrinkA=3, shrinkB=0) # plot plt.xlim(xmin, xmax) plt.ylim(ymin, ymax) rcParams['font.size'] = old_fontsize return fig