def re_warp(array_in, lengths): """Return iterable ::py:obj:array_in as a list of arrays, each one with the length specified in lengths Parameters ---------- array_in: any iterable Iterable to be re_warped lengths : int or iterable of integers Lengths of the individual elements of the returned array. If only one int is parsed, all lengths will be that int. Special cases: * more lengths than needed are parsed: the last elements of the returned value are empty until all lengths have been used * less lengths than array_in could take: only the lenghts specified are returned in the warped list, the rest is unreturned Returns ------- warped: list """ if _is_int(lengths): lengths = [lengths] * int(_np.ceil(len(array_in) / lengths)) warped = [] idxi = 0 for ii, ll in enumerate(lengths): warped.append(array_in[idxi:idxi + ll]) idxi += ll return warped
def parse_atom_sel(atom_selection, top): r""" Provided an `mdtraj.Topology` and :obj:`superpose_info` get the atoms that are needed to a subsequent superposition operation Parameters ---------- atom_selection : boolean, str, or iterable of integers boolean : "True" orients with all atoms or "False" won't do anything str : superpose according to anything :obj:`mdtraj.Topology.select` can understand (http://mdtraj.org/latest/atom_selection.html) iterable of integers : superpose according to these atom idxs top : :obj:`mdtraj.Topology` object Returns ------- sel : iterable of integers or None """ # Superpose if wanted sel = None if atom_selection is True: sel = _np.arange(top.n_atoms) elif atom_selection is False: pass elif isinstance(atom_selection, str): sel = top.select(atom_selection) elif isinstance(atom_selection, (list, _np.ndarray)): assert _np.all([_is_int(ii) for ii in atom_selection]) sel = atom_selection return sel
def plot_implied_timescales(ITS, ax=None, outfile=None, show_mle=True, show_mean=True, xlog=False, ylog=True, confidence=0.95, refs=None, nits=-1, process=None, units='steps', dt=1., **kwargs): r"""Implied timescale plot Parameters ---------- ITS : implied timescales object. Object whose data will be plotted. Should be of type :class:`ImpliedTimescales <pyemma.msm.ImpliedTimescales>` or a 3-tuple with: 1d array of lagtimes, (lagtimes, processes) array of timescales, optionally (lagtimes, processes, samples) array of timescales of samples. ax : matplotlib Axes object, optional, default = None the axes to plot to. When set to None the default Axes object will be used. outfile : str, optional, default = None output file to write the figure to. When not given, the plot will be displayed show_mean : bool, default = True Line for mean value will be shown, if available show_mle : bool, default = True Line for maximum likelihood estimate will be shown xlog : bool, optional, default = False Iff true, the x-Axis is logarithmized ylog : bool, optional, default = True Iff true, the y-Axis is logarithmized confidence : float, optional, default = 0.95 The confidence interval for plotting error bars (if available) refs : ndarray((m), dtype=float), optional, default = None Reference (exact solution or other reference) timescales if known. The number of timescales must match those in the ITS object nits: integer, default = -1 Number of implied timescales to be shown. The default behaviour (-1) is to show all timescales available. :py:obj:`nits` != -1 and :py:obj:`process` != None are mutually exclusive process : iterable of integers, default is None list or ndarray((m), dtype=int) containing a list of the processes to be shown. The default behaviour is to show all timescales available. :py:obj:`process` != None and :py:obj:`nits` != -1 are mutually exclusive units: str or list (len=2) of strings, optional, default = 'steps' Affects the labeling of the axes. Used with :py:obj:`dt`, allows for changing the physical units of the axes. Accepts simple LaTeX math strings, eg. '$\mu$s' If this parameter is a list, it will be assumed that units[0] is for the x-axis and units[1] for the y-axis. dt: float or list(len=2) of floats, optional, default = 1.0 Physical time between frames, expressed the units given in :py:obj:`units`. E.g, if you know that each frame corresponds to .010 ns, you can use the combination of parameters :py:obj:`dt` =0.01, :py:obj:`units` ='ns' to display the implied timescales in ns (instead of frames) If this parameter is a list, it will be assumed that dt[0] is for the x-axis and dt[1] for the y-axis. **kwargs: Will be passed to pyplot.plot when plotting the MLE datapoints (not the bootstrapped means). See the doc of pyplot for more options. Most useful lineproperties like `marker='o'` and/or :markersize=5 Returns ------- ax : Axes object containing the plot """ if isinstance(ITS, tuple): assert len(ITS) in (2, 3) lags = _np.asarray(ITS[0]) timescales = _np.asarray(ITS[1]) timescales_samples = _np.asarray( ITS[2]) if len(ITS) == 3 and ITS[2] is not None else None n_timescales = timescales.shape[1] samples_available = timescales_samples is not None else: lags = ITS.lagtimes n_timescales = ITS.number_of_timescales timescales = ITS.get_timescales() samples_available = ITS.samples_available timescales_samples = ITS.timescales_samples if timescales_samples is not None: timescales_samples = timescales_samples.transpose(1, 2, 0) import matplotlib.pyplot as _plt # check input if ax is None: ax = _plt.gca() colors = ['blue', 'red', 'green', 'cyan', 'purple', 'orange', 'violet'] xmax = _np.max(lags) srt = _np.argsort(lags) # Check the processes to be shown if process is not None: if nits != -1: raise TypeError( 'optional arguments nits and process are mutually exclusive:', nits, process) if not _is_iterable_of_int(process): raise ValueError('process has to be an iterable of integers') if _np.max(process) + 1 > n_timescales: raise ValueError( 'requested process %u, whereas ITS only contains %u timescales' % (_np.max(process), n_timescales)) # Now that it's for sure that nits==-1, # process is iter_of_ints, and the requested processes exist in its object: its_idx = process else: if not _is_int(nits): raise TypeError('nits is not an integer, ', nits) if nits == -1: nits = n_timescales its_idx = _np.arange(n_timescales)[:nits] # Check units and dt for user error. if isinstance(units, list) and len(units) != 2: raise TypeError("If units is a list, len(units) has to be = 2") if isinstance(dt, list) and len(dt) != 2: raise TypeError("If dt is a list, len(dt) has to be = 2") # Create list of units and dts for different axis if isinstance(units, str): units = [units] * 2 if isinstance(dt, (float, int)): dt = [dt] * 2 for i in its_idx: # plot estimate if show_mle: ax.plot(lags[srt] * dt[0], timescales[..., i][srt] * dt[1], color=colors[i % len(colors)], **kwargs) # sample available? if samples_available: # plot sample mean process_samples = timescales_samples[:, i, :].T if show_mean: sample_mean = _np.mean(process_samples, axis=0) ax.plot(lags[srt] * dt[0], sample_mean[srt] * dt[1], marker='o', color=colors[i % len(colors)], linestyle='dashed') lconf, rconf = _conf(process_samples, conf=confidence) ax.fill_between(lags[srt] * dt[0], lconf[srt] * dt[1], rconf[srt] * dt[1], alpha=0.2, color=colors[i % len(colors)]) # reference available? if refs is not None: tref = refs[i] * dt[1] ax.plot([0, min(tref, xmax) * dt[0]], [tref, tref], color='black', linewidth=1) # cutoff ax.plot(lags[srt] * dt[0], lags[srt] * dt[1], linewidth=2, color='black') ax.set_xlim([1.0 * dt[0], xmax * dt[0]]) ax.fill_between(lags[srt] * dt[0], ax.get_ylim()[0] * _np.ones(len(lags)) * dt[1], lags[srt] * dt[1], alpha=0.5, color='grey') # formatting ax.set_xlabel('lag time / %s' % units[0]) ax.set_ylabel('timescale / %s' % units[1]) if xlog: ax.set_xscale('log') if ylog: ax.set_yscale('log') # show or save if outfile is not None: _plt.savefig(outfile) return ax
def link_ax_w_pos_2_nglwidget( ax, pos, ngl_wdg, crosshairs=True, dot_color='red', band_width=None, radius=False, directionality=None, exclude_coord=None, ): r""" Initial idea for this function comes from @arose, the rest is @gph82 Parameters ---------- ax : matplotlib axis object to be linked pos : ndarray of shape (N,2) with the positions of the geoms in the ngl_wdg crosshairs : Boolean or str If True, a crosshair will show where the mouse-click ocurred. If 'h' or 'v', only the horizontal or vertical line of the crosshair will be shown, respectively. If False, no crosshair will appear dot_color : Anything that yields matplotlib.colors.is_color_like(dot_color)==True Default is 'red'. dot_color='None' yields no dot band_width : None or iterable of len = 2 If band_width is not None, the method tries to figure out on its own if there is an ascending coordinate and will include a moving band on :obj:ax of this width (in units of the axis along which the band is plotted) If the method cannot find an ascending coordinate, an exception is thrown directionality : str or None, default is None If not None, directionality can be either 'a2w' or 'w2a', meaning that connectivity between axis and widget will be only established as * 'a2w' : action in axis triggers action in widget, but not the other way around * 'w2a' : action in widget triggers action in axis, but not the other way around exclude_coord : None or int , default is None The excluded coordinate will not be considered when computing the nearest-point-to-click. Typical use case is for visualize.traj to only compute distances horizontally along the time axis Returns ------- axes_widget : :obj:`matplotlib.Axes.Axeswidget` that has been linked to the NGLWidget """ assert directionality in [None, 'a2w', 'w2a'], "The directionality parameter has to be in [None, 'a2w', 'w2a'] " \ "not %s"%directionality assert crosshairs in [True, False, 'h', 'v'], "The crosshairs parameter has to be in [True, False, 'h','v'], " \ "not %s" % crosshairs ipos = _np.copy(pos) if _is_int(exclude_coord): ipos[:, exclude_coord] = 0 # Are we in a sticky situation? if hasattr(ngl_wdg, '_GeomsInWid'): sticky = True else: assert ngl_wdg.trajectory_0.n_frames == pos.shape[0], \ ("Mismatching frame numbers %u vs %u" % (ngl_wdg.trajectory_0.n_frames, pos.shape[0])) sticky = False # Basic interactive objects showclick_objs = [] if crosshairs in [True, 'h']: lineh = ax.axhline(ax.get_ybound()[0], c="black", ls='--') setattr(lineh, 'whatisthis', 'lineh') showclick_objs.append(lineh) if crosshairs in [True, 'v']: linev = ax.axvline(ax.get_xbound()[0], c="black", ls='--') setattr(linev, 'whatisthis', 'linev') showclick_objs.append(linev) if _is_color_like(dot_color): pass else: raise TypeError('dot_color should be a matplotlib color') dot = ax.plot(pos[0, 0], pos[0, 1], 'o', c=dot_color, ms=7, zorder=100)[0] setattr(dot, 'whatisthis', 'dot') list_mpl_objects_to_update = [dot] # Other objects, related to smoothing options if band_width is not None: if radius: band_width_in_pts = int( _np.round(pts_per_axis_unit(ax).mean() * _np.mean(band_width))) rad = ax.plot(pos[0, 0], pos[0, 1], 'o', ms=_np.round(band_width_in_pts), c='green', alpha=.25, markeredgecolor='None')[0] setattr(rad, 'whatisthis', 'dot') if not sticky: list_mpl_objects_to_update.append(rad) else: # print("Band_width(x,y) is %s" % (band_width)) coord_idx = get_ascending_coord_idx(pos) if _np.ndim(coord_idx) > 0 and len(coord_idx) == 0: raise ValueError( "Must have an ascending coordinate for band_width usage") band_width_in_pts = int( _np.round( pts_per_axis_unit(ax)[coord_idx] * band_width[coord_idx])) # print("Band_width in %s is %s pts"%('xy'[coord_idx], band_width_in_pts)) band_call = [ax.axvline, ax.axhline][coord_idx] band_init = [ax.get_xbound, ax.get_ybound][coord_idx] band_type = ['linev', 'lineh'][coord_idx] band = band_call(band_init()[0], lw=band_width_in_pts, c="green", ls='-', alpha=.25) setattr(band, 'whatisthis', band_type) list_mpl_objects_to_update.append(band) ngl_wdg.isClick = False CLA_listener = ClickOnAxisListener(ngl_wdg, crosshairs, showclick_objs, ax, pos, list_mpl_objects_to_update) NGL_listener = ChangeInNGLWidgetListener(ngl_wdg, list_mpl_objects_to_update, pos) # Connect axes to widget axes_widget = _AxesWidget(ax) if directionality in [None, 'a2w']: axes_widget.connect_event('button_release_event', CLA_listener) # Connect widget to axes if directionality in [None, 'w2a']: ngl_wdg.observe(NGL_listener, "frame", "change") ngl_wdg.center() return axes_widget
def visual_path(cat_idxs, cat_data, path_type='min_disp', start_pos='maxpop', start_frame=None, **path_kwargs): r""" Create a path that advances in the coordinate of interest # while minimizing distance in the other coordinates (minimal displacement path) cat_idxs : list or np.ndarray of len(cat_data) Each element of this iterable is an ndarray (N,2) whith (traj_idx, frame_idx) pairs pointing towards the trajectory frames. It usually has been generated using cl.sample_indexes_by_cluster. cat_data: iterable of length len(cat_idxs) Each element of this iterable contains the data correspoding to the frames contained in :py:obj:cat_idxs. At the moment, this data can be either an nd.array or an :py:obj:mdtraj.Trajectory start_pos: str or int, default is 'maxpop', alternatives are 'left', 'right' Where to start the path. It refers to an index of :py:obj:cat_idxs and :py:obj:cat_data Since the path is constructed to be visually appealing, it makes sense to start the path close to the most visited value of the coordinate. Options are 'maxpop': does exactly that: Starting from the most populated value of the coordinate, it creates two projection_paths, one moving forward and one moving backward. These are the n and backward ('left') create a coordinate-increasing, diffusion-minimizing path from 'left': starts at the "left end" of the coordinate, i.e. at its minimum value, and moves forward 'right' starts at the "right end" of the coordinate, i.e. at its maximum value, and moves backward int: path from cat_idxs[start_pop] and cat_data[start_pop] path_type = 'min_disp' or 'minRMSD' start_frame = if the user already knows, of the start_pos index, the frame that's best tested = False *path_kwargs: keyword arguments for the path-choosing algorithm. See min_disp_path or min_rmsd_path for details, but in the meantime, these are history_aware=True or False and exclude_coords=None or [0], or [0,1] etc... """ #First sanity check assert len(cat_data) == len(cat_idxs) # Second sanity check assert _np.all( [len(icd) == len(ici) for icd, ici in zip(cat_data, cat_idxs)]) if start_pos == 'maxpop': start_idx = _np.argmax([len(icat) for icat in cat_idxs]) elif _is_int(start_pos): start_idx = start_pos else: raise NotImplementedError(start_pos) if start_frame is None: # Draw a random frame from the starting point's catalgue start_frame = _np.random.randint(0, high=len(cat_idxs[start_idx])) start_fwd = cat_data[start_idx][start_frame] start_bwd = cat_data[start_idx][start_frame] if path_type == 'min_disp': path_fwd = [start_frame] + min_disp_path( start_fwd, cat_data[start_idx + 1:], **path_kwargs) path_bwd = [start_frame] + min_disp_path( start_bwd, cat_data[:start_idx][::-1], **path_kwargs) elif path_type == 'min_rmsd': path_fwd = [start_frame] + min_rmsd_path( start_fwd, cat_data[start_idx + 1:], **path_kwargs) path_bwd = [start_frame] + min_rmsd_path( start_bwd, cat_data[:start_idx][::-1], **path_kwargs) else: raise NotImplementedError(path_type) path_fwd = _np.vstack( [cat_idxs[start_idx:][ii][idx] for ii, idx in enumerate(path_fwd)]) # Take the catalogue entries until :start_idx and invert them # Slice up to including start_idx, need a plus one path_bwd = _np.vstack([ cat_idxs[:start_idx + 1][::-1][ii][idx] for ii, idx in enumerate(path_bwd) ]) # Invert path_bwd it and exclude last frame (otherwise the most visited appears twice) path = _np.vstack((path_bwd[::-1][:-1], path_fwd)) # Sanity cheks #assert _np.all(_np.diff([cl.clustercenters[cl.dtrajs[ii][jj],0] for ii,jj in path])>0) assert len(path) == len(cat_idxs) return path, start_idx
def catalogues(cl, data=None, sort_by=None): r""" Returns the frames in catalogues form by cluster index: one as list (len Ncl) of ndarrays each of shape (Ni, 2) containing pairs of (traj_idx, frame_idx) values and one as lists of ndarrays of the actual (continous) data values at the (traj_idx, frame_idx) Parameters ---------- cl : :obj:`pyemma.coordinates.cluster_regspace` object data : None or list, default is None The :obj:`cl` has its own :obj:`cl.dataproducer.data` attribute from which it can retrieve the necessary information for the :obj:`cat_data` (default behaviour) However, any other any data can be given here, **as long as the user is sure that it represents EXACTLY the data that was used to parametrize the :obj:`cl` object. Internally, the only checks that are carried out are: len(data) == len(cl.dataproducer.data) [len(idata) == len(jdata) for idata, jdata in zip(data, cl.dataproducer.data)] (Note that by construction the same relations should hold for :obj:`cl.dtrajs`) sort_by : None or int, default is None Default behaviour is to return the catalogues in the same order of clustercenters as the input, but it is sometimes useful have them sorted by ascending order of the n-th coordinate of the input space Returns -------- cat_idxs : list of 2D np.arrays The discrete catalogue. It is a list of len = :obj:`cl.n_clustercenters` containing a 2D vector with all the (file, frame)-pairs in which each clustercenter appears cat_data : list of ndarrays The actual value (assumed continuous) of the data at the (file-frame)-pairs of the :obj:`cat_idxs` list tested: True """ idata = cl.data_producer.data if data is not None: assert len(data) == len(idata) assert _np.all( [len(jdata) == len(ddata) for jdata, ddata in zip(idata, data)]) idata = data cat_idxs = _index_states(cl.dtrajs) cat_cont = [] for __, icat in enumerate(cat_idxs): cat_cont.append(_np.vstack([idata[ii][jj] for ii, jj in icat])) if sort_by is not None: assert _is_int(sort_by) assert sort_by <= cl.clustercenters.shape[ 1], "Want to sort by %u-th coord, but centers have only %u dims" % ( sort_by, cl.clustercenters.shape[1]) sorts_coordinate = _np.argsort(cl.clustercenters[:, sort_by]) cat_idxs = [cat_idxs[ii] for ii in sorts_coordinate] cat_cont = [cat_cont[ii] for ii in sorts_coordinate] return cat_idxs, cat_cont
def listify_if_int(inp): if _is_int(inp): inp = [inp] return inp
def add_atom_idxs_widget(atom_idxs, ngl_wdg, color_list=None, radius=1): r""" provided a list of atom_idxs and a ngl_wdg, try to represent them as well as possible in the ngl_wdg It is assumed that this method is called once per feature, ie. the number of atoms defines the feature. This way, the method decides how to best represent them best to represent them. Currently, that means: * single atoms: assume cartesian feature, represent with spacefill * pairs of atoms: assume distance feature, represent with distance * everything else is ignored Parameters ---------- atom_idxs : list of iterables of integers. If [], the method won't do anything ngl_wdg : nglview ngl_wdg on which to represent stuff color_list: list, default is None list of colors to provide the representations with. The default None yields blue. In principle, len(atom_idxs) should be == len(color_list), but if your list is short it will just default to the last color. This way, color_list=['black'] will paint all black regardless len(atom_idxs) radius : float, default is 1 radius of the spacefill representation Returns ------- ngl_wdg : Input ngl_wdg with the representations added """ if color_list in [None, [None]]: color_list = ['blue'] * len(atom_idxs) elif isinstance(color_list, list) and len(color_list) < len(atom_idxs): color_list += [color_list[-1]] * (len(atom_idxs) - len(color_list)) if atom_idxs is not []: for cc in range(len(ngl_wdg._ngl_component_ids)): for iidxs, color in zip(atom_idxs, color_list): if _is_int(iidxs): ngl_wdg.add_spacefill(selection=[iidxs], radius=radius, color=color, component=cc) elif _np.ndim(iidxs) > 0 and len(iidxs) == 2: ngl_wdg.add_distance( atom_pair=[[ii for ii in iidxs] ], # yes it has to be this way for now color=color, #label_color='black', label_size=0, component=cc) # TODO add line thickness as **kwarg elif _np.ndim(iidxs) > 0 and len(iidxs) in [3, 4]: ngl_wdg.add_spacefill(selection=iidxs, radius=radius, color=color, component=cc) else: print( "Cannot represent features involving more than 5 atoms per single feature" ) return ngl_wdg