Exemple #1
0
def re_warp(array_in, lengths):
    """Return iterable ::py:obj:array_in as a list of arrays, each
     one with the length specified in lengths

    Parameters
    ----------

    array_in: any iterable
        Iterable to be re_warped

    lengths : int or iterable of integers
        Lengths of the individual elements of the returned array. If only one int is parsed, all lengths will
        be that int. Special cases:
            * more lengths than needed are parsed: the last elements of the returned value are empty
            until all lengths have been used
            * less lengths than array_in could take: only the lenghts specified are returned in the
            warped list, the rest is unreturned
    Returns
    -------
    warped: list
    """

    if _is_int(lengths):
        lengths = [lengths] * int(_np.ceil(len(array_in) / lengths))

    warped = []
    idxi = 0
    for ii, ll in enumerate(lengths):
        warped.append(array_in[idxi:idxi + ll])
        idxi += ll
    return warped
Exemple #2
0
def parse_atom_sel(atom_selection, top):
    r"""
    Provided an `mdtraj.Topology` and :obj:`superpose_info` get the atoms that are needed
    to a subsequent superposition operation

    Parameters
    ----------

    atom_selection : boolean, str, or iterable of integers
        boolean : "True" orients with all atoms or "False" won't do anything
        str  : superpose according to anything :obj:`mdtraj.Topology.select` can understand (http://mdtraj.org/latest/atom_selection.html)
        iterable of integers : superpose according to these atom idxs

    top : :obj:`mdtraj.Topology` object


    Returns
    -------

    sel : iterable of integers or None
    """
    # Superpose if wanted
    sel = None
    if atom_selection is True:
        sel = _np.arange(top.n_atoms)
    elif atom_selection is False:
        pass
    elif isinstance(atom_selection, str):
        sel = top.select(atom_selection)
    elif isinstance(atom_selection, (list, _np.ndarray)):
        assert _np.all([_is_int(ii) for ii in atom_selection])
        sel = atom_selection
    return sel
Exemple #3
0
def plot_implied_timescales(ITS,
                            ax=None,
                            outfile=None,
                            show_mle=True,
                            show_mean=True,
                            xlog=False,
                            ylog=True,
                            confidence=0.95,
                            refs=None,
                            nits=-1,
                            process=None,
                            units='steps',
                            dt=1.,
                            **kwargs):
    r"""Implied timescale plot

    Parameters
    ----------
    ITS : implied timescales object.
        Object whose data will be plotted. Should be of type :class:`ImpliedTimescales <pyemma.msm.ImpliedTimescales>`
        or a 3-tuple with: 1d array of lagtimes, (lagtimes, processes) array of timescales,
        optionally (lagtimes, processes, samples) array of timescales of samples.
    ax : matplotlib Axes object, optional, default = None
        the axes to plot to. When set to None the default Axes object will be used.
    outfile : str, optional, default = None
        output file to write the figure to. When not given, the plot will be displayed
    show_mean : bool, default = True
        Line for mean value will be shown, if available
    show_mle : bool, default = True
        Line for maximum likelihood estimate will be shown
    xlog : bool, optional, default = False
        Iff true, the x-Axis is logarithmized
    ylog : bool, optional, default = True
        Iff true, the y-Axis is logarithmized
    confidence : float, optional, default = 0.95
        The confidence interval for plotting error bars (if available)
    refs : ndarray((m), dtype=float), optional, default = None
        Reference (exact solution or other reference) timescales if known. The number of timescales must match those
        in the ITS object
    nits: integer, default = -1
        Number of implied timescales to be shown. The default behaviour (-1) is to show all timescales available.
        :py:obj:`nits` != -1 and :py:obj:`process` != None are mutually exclusive
    process : iterable of integers, default is None
        list or ndarray((m), dtype=int) containing a list of the processes to be shown. The default behaviour is
        to show all timescales available.
        :py:obj:`process` != None and :py:obj:`nits` != -1 are mutually exclusive
    units: str or list (len=2) of strings, optional, default = 'steps'
        Affects the labeling of the axes. Used with :py:obj:`dt`, allows for changing the physical units of the axes.
        Accepts simple LaTeX math strings, eg. '$\mu$s'
        If this parameter is a list, it will be assumed that units[0] is for the x-axis and units[1] for the y-axis.
    dt: float or list(len=2) of floats, optional, default = 1.0
        Physical time between frames, expressed the units given in :py:obj:`units`. E.g, if you know that each
        frame corresponds to .010 ns, you can use the combination of parameters :py:obj:`dt` =0.01,
        :py:obj:`units` ='ns' to display the implied timescales in ns (instead of frames)
        If this parameter is a list, it will be assumed that dt[0] is for the x-axis and dt[1] for the y-axis.

    **kwargs: Will be passed to pyplot.plot when plotting the MLE datapoints (not the bootstrapped means).
            See the doc of pyplot for more options. Most useful lineproperties like `marker='o'` and/or :markersize=5

    Returns
    -------
    ax : Axes object containing the plot

    """
    if isinstance(ITS, tuple):
        assert len(ITS) in (2, 3)
        lags = _np.asarray(ITS[0])
        timescales = _np.asarray(ITS[1])
        timescales_samples = _np.asarray(
            ITS[2]) if len(ITS) == 3 and ITS[2] is not None else None
        n_timescales = timescales.shape[1]
        samples_available = timescales_samples is not None
    else:
        lags = ITS.lagtimes
        n_timescales = ITS.number_of_timescales
        timescales = ITS.get_timescales()
        samples_available = ITS.samples_available
        timescales_samples = ITS.timescales_samples
        if timescales_samples is not None:
            timescales_samples = timescales_samples.transpose(1, 2, 0)
    import matplotlib.pyplot as _plt
    # check input
    if ax is None:
        ax = _plt.gca()
    colors = ['blue', 'red', 'green', 'cyan', 'purple', 'orange', 'violet']
    xmax = _np.max(lags)
    srt = _np.argsort(lags)
    # Check the processes to be shown
    if process is not None:
        if nits != -1:
            raise TypeError(
                'optional arguments nits and process are mutually exclusive:',
                nits, process)
        if not _is_iterable_of_int(process):
            raise ValueError('process has to be an iterable of integers')
        if _np.max(process) + 1 > n_timescales:
            raise ValueError(
                'requested process %u, whereas ITS only contains %u timescales'
                % (_np.max(process), n_timescales))
        # Now that it's for sure that nits==-1,
        # process is iter_of_ints, and the requested processes exist in its object:
        its_idx = process
    else:
        if not _is_int(nits):
            raise TypeError('nits is not an integer, ', nits)
        if nits == -1:
            nits = n_timescales
        its_idx = _np.arange(n_timescales)[:nits]
    # Check units and dt for user error.
    if isinstance(units, list) and len(units) != 2:
        raise TypeError("If units is a list, len(units) has to be = 2")
    if isinstance(dt, list) and len(dt) != 2:
        raise TypeError("If dt is a list, len(dt) has to be = 2")
    # Create list of units and dts for different axis
    if isinstance(units, str):
        units = [units] * 2
    if isinstance(dt, (float, int)):
        dt = [dt] * 2
    for i in its_idx:
        # plot estimate
        if show_mle:
            ax.plot(lags[srt] * dt[0],
                    timescales[..., i][srt] * dt[1],
                    color=colors[i % len(colors)],
                    **kwargs)
        # sample available?
        if samples_available:
            # plot sample mean
            process_samples = timescales_samples[:, i, :].T
            if show_mean:
                sample_mean = _np.mean(process_samples, axis=0)
                ax.plot(lags[srt] * dt[0],
                        sample_mean[srt] * dt[1],
                        marker='o',
                        color=colors[i % len(colors)],
                        linestyle='dashed')
            lconf, rconf = _conf(process_samples, conf=confidence)
            ax.fill_between(lags[srt] * dt[0],
                            lconf[srt] * dt[1],
                            rconf[srt] * dt[1],
                            alpha=0.2,
                            color=colors[i % len(colors)])
        # reference available?
        if refs is not None:
            tref = refs[i] * dt[1]
            ax.plot([0, min(tref, xmax) * dt[0]], [tref, tref],
                    color='black',
                    linewidth=1)
    # cutoff
    ax.plot(lags[srt] * dt[0], lags[srt] * dt[1], linewidth=2, color='black')
    ax.set_xlim([1.0 * dt[0], xmax * dt[0]])
    ax.fill_between(lags[srt] * dt[0],
                    ax.get_ylim()[0] * _np.ones(len(lags)) * dt[1],
                    lags[srt] * dt[1],
                    alpha=0.5,
                    color='grey')
    # formatting
    ax.set_xlabel('lag time / %s' % units[0])
    ax.set_ylabel('timescale / %s' % units[1])
    if xlog:
        ax.set_xscale('log')
    if ylog:
        ax.set_yscale('log')
    # show or save
    if outfile is not None:
        _plt.savefig(outfile)
    return ax
Exemple #4
0
def link_ax_w_pos_2_nglwidget(
    ax,
    pos,
    ngl_wdg,
    crosshairs=True,
    dot_color='red',
    band_width=None,
    radius=False,
    directionality=None,
    exclude_coord=None,
):
    r"""
    Initial idea for this function comes from @arose, the rest is @gph82

    Parameters
    ----------
    ax : matplotlib axis object to be linked

    pos : ndarray of shape (N,2) with the positions of the geoms in the ngl_wdg

    crosshairs : Boolean or str
        If True, a crosshair will show where the mouse-click ocurred. If 'h' or 'v', only the horizontal or
        vertical line of the crosshair will be shown, respectively. If False, no crosshair will appear

    dot_color : Anything that yields matplotlib.colors.is_color_like(dot_color)==True
        Default is 'red'. dot_color='None' yields no dot

    band_width : None or iterable of len = 2
        If band_width is not None, the method tries to figure out on its own if
        there is an ascending coordinate and will include a moving band on :obj:ax
        of this width (in units of the axis along which the band is plotted)

        If the method cannot find an ascending coordinate, an exception is thrown

    directionality : str or None, default is None
        If not None, directionality can be either 'a2w' or 'w2a', meaning that connectivity
         between axis and widget will be only established as
         * 'a2w' : action in axis   triggers action in widget, but not the other way around
         * 'w2a' : action in widget triggers action in axis, but not the other way around

    exclude_coord : None or int , default is None
        The excluded coordinate will not be considered when computing the nearest-point-to-click.
        Typical use case is for visualize.traj to only compute distances horizontally along the time axis

    Returns
    -------

    axes_widget : :obj:`matplotlib.Axes.Axeswidget` that has been linked to the NGLWidget
    """

    assert directionality in [None, 'a2w', 'w2a'], "The directionality parameter has to be in [None, 'a2w', 'w2a'] " \
                                                   "not %s"%directionality

    assert crosshairs in [True, False, 'h', 'v'], "The crosshairs parameter has to be in [True, False, 'h','v'], " \
                                                   "not %s" % crosshairs
    ipos = _np.copy(pos)
    if _is_int(exclude_coord):
        ipos[:, exclude_coord] = 0

    # Are we in a sticky situation?
    if hasattr(ngl_wdg, '_GeomsInWid'):
        sticky = True
    else:
        assert ngl_wdg.trajectory_0.n_frames == pos.shape[0], \
            ("Mismatching frame numbers %u vs %u" % (ngl_wdg.trajectory_0.n_frames, pos.shape[0]))
        sticky = False

    # Basic interactive objects
    showclick_objs = []
    if crosshairs in [True, 'h']:
        lineh = ax.axhline(ax.get_ybound()[0], c="black", ls='--')
        setattr(lineh, 'whatisthis', 'lineh')
        showclick_objs.append(lineh)
    if crosshairs in [True, 'v']:
        linev = ax.axvline(ax.get_xbound()[0], c="black", ls='--')
        setattr(linev, 'whatisthis', 'linev')
        showclick_objs.append(linev)

    if _is_color_like(dot_color):
        pass
    else:
        raise TypeError('dot_color should be a matplotlib color')

    dot = ax.plot(pos[0, 0], pos[0, 1], 'o', c=dot_color, ms=7, zorder=100)[0]
    setattr(dot, 'whatisthis', 'dot')
    list_mpl_objects_to_update = [dot]

    # Other objects, related to smoothing options
    if band_width is not None:
        if radius:
            band_width_in_pts = int(
                _np.round(pts_per_axis_unit(ax).mean() * _np.mean(band_width)))
            rad = ax.plot(pos[0, 0],
                          pos[0, 1],
                          'o',
                          ms=_np.round(band_width_in_pts),
                          c='green',
                          alpha=.25,
                          markeredgecolor='None')[0]
            setattr(rad, 'whatisthis', 'dot')
            if not sticky:
                list_mpl_objects_to_update.append(rad)
        else:
            # print("Band_width(x,y) is %s" % (band_width))
            coord_idx = get_ascending_coord_idx(pos)
            if _np.ndim(coord_idx) > 0 and len(coord_idx) == 0:
                raise ValueError(
                    "Must have an ascending coordinate for band_width usage")
            band_width_in_pts = int(
                _np.round(
                    pts_per_axis_unit(ax)[coord_idx] * band_width[coord_idx]))
            # print("Band_width in %s is %s pts"%('xy'[coord_idx], band_width_in_pts))

            band_call = [ax.axvline, ax.axhline][coord_idx]
            band_init = [ax.get_xbound, ax.get_ybound][coord_idx]
            band_type = ['linev', 'lineh'][coord_idx]
            band = band_call(band_init()[0],
                             lw=band_width_in_pts,
                             c="green",
                             ls='-',
                             alpha=.25)
            setattr(band, 'whatisthis', band_type)
            list_mpl_objects_to_update.append(band)

    ngl_wdg.isClick = False

    CLA_listener = ClickOnAxisListener(ngl_wdg, crosshairs, showclick_objs, ax,
                                       pos, list_mpl_objects_to_update)

    NGL_listener = ChangeInNGLWidgetListener(ngl_wdg,
                                             list_mpl_objects_to_update, pos)
    # Connect axes to widget
    axes_widget = _AxesWidget(ax)
    if directionality in [None, 'a2w']:
        axes_widget.connect_event('button_release_event', CLA_listener)

    # Connect widget to axes
    if directionality in [None, 'w2a']:
        ngl_wdg.observe(NGL_listener, "frame", "change")

    ngl_wdg.center()
    return axes_widget
Exemple #5
0
def visual_path(cat_idxs,
                cat_data,
                path_type='min_disp',
                start_pos='maxpop',
                start_frame=None,
                **path_kwargs):
    r""" Create a path that advances in the coordinate of interest
    # while minimizing distance in the other coordinates (minimal displacement path)

    cat_idxs : list or np.ndarray of len(cat_data)
        Each element of this iterable is an ndarray (N,2) whith (traj_idx, frame_idx)
        pairs pointing towards the trajectory frames. It usually has been generated
        using cl.sample_indexes_by_cluster.

    cat_data:  iterable of length len(cat_idxs)
        Each element of this iterable contains the data correspoding to the frames contained
        in :py:obj:cat_idxs. At the moment, this data can be either an nd.array or an
        :py:obj:mdtraj.Trajectory

    start_pos: str or int, default is 'maxpop', alternatives are 'left', 'right'
       Where to start the path. It refers to an index of :py:obj:cat_idxs and :py:obj:cat_data
       Since the path is constructed to be visually appealing, it makes sense to start the path close to the most visited value of the coordinate. Options are
       'maxpop': does exactly that: Starting from the most populated value of the coordinate,
                 it creates two projection_paths, one moving forward and one moving backward.
                 These are the n and backward ('left') create a coordinate-increasing, diffusion-minimizing path from
       'left':   starts at the "left end" of the coordinate, i.e. at its minimum value, and moves forward
       'right'   starts at the "right end" of the coordinate, i.e. at its maximum value, and moves backward
        int:    path from cat_idxs[start_pop] and cat_data[start_pop]
    path_type = 'min_disp' or 'minRMSD'

    start_frame = if the user already knows, of the start_pos index, the frame that's best

    tested = False

    *path_kwargs: keyword arguments for the path-choosing algorithm. See min_disp_path or min_rmsd_path for details, but
     in the meantime, these are history_aware=True or False and exclude_coords=None or [0], or [0,1] etc...
    """
    #First sanity check
    assert len(cat_data) == len(cat_idxs)
    # Second sanity check
    assert _np.all(
        [len(icd) == len(ici) for icd, ici in zip(cat_data, cat_idxs)])

    if start_pos == 'maxpop':
        start_idx = _np.argmax([len(icat) for icat in cat_idxs])
    elif _is_int(start_pos):
        start_idx = start_pos
    else:
        raise NotImplementedError(start_pos)

    if start_frame is None:
        # Draw a random frame from the starting point's catalgue
        start_frame = _np.random.randint(0, high=len(cat_idxs[start_idx]))

    start_fwd = cat_data[start_idx][start_frame]
    start_bwd = cat_data[start_idx][start_frame]
    if path_type == 'min_disp':
        path_fwd = [start_frame] + min_disp_path(
            start_fwd, cat_data[start_idx + 1:], **path_kwargs)
        path_bwd = [start_frame] + min_disp_path(
            start_bwd, cat_data[:start_idx][::-1], **path_kwargs)
    elif path_type == 'min_rmsd':
        path_fwd = [start_frame] + min_rmsd_path(
            start_fwd, cat_data[start_idx + 1:], **path_kwargs)
        path_bwd = [start_frame] + min_rmsd_path(
            start_bwd, cat_data[:start_idx][::-1], **path_kwargs)
    else:
        raise NotImplementedError(path_type)
    path_fwd = _np.vstack(
        [cat_idxs[start_idx:][ii][idx] for ii, idx in enumerate(path_fwd)])
    # Take the catalogue entries until :start_idx and invert them
    # Slice up to including start_idx, need a plus one
    path_bwd = _np.vstack([
        cat_idxs[:start_idx + 1][::-1][ii][idx]
        for ii, idx in enumerate(path_bwd)
    ])
    # Invert path_bwd it and exclude last frame (otherwise the most visited appears twice)
    path = _np.vstack((path_bwd[::-1][:-1], path_fwd))

    # Sanity cheks
    #assert _np.all(_np.diff([cl.clustercenters[cl.dtrajs[ii][jj],0] for ii,jj in path])>0)
    assert len(path) == len(cat_idxs)
    return path, start_idx
Exemple #6
0
def catalogues(cl, data=None, sort_by=None):
    r""" Returns the frames in catalogues form by cluster index:
     one as list (len Ncl) of ndarrays each of shape (Ni, 2) containing pairs of (traj_idx, frame_idx) values
     and one as lists of ndarrays of the actual (continous) data values at the (traj_idx, frame_idx)

    Parameters
    ----------

    cl : :obj:`pyemma.coordinates.cluster_regspace` object

    data : None or list, default is None
       The :obj:`cl` has its own  :obj:`cl.dataproducer.data` attribute from which it can
       retrieve the necessary information for  the :obj:`cat_data` (default behaviour)
       However, any other any data can be given here, **as long as the user is sure that it represents EXACTLY
       the data that was used to parametrize the :obj:`cl` object.
       Internally, the only checks that are carried out are:

           len(data) == len(cl.dataproducer.data)

           [len(idata) == len(jdata) for idata, jdata in zip(data, cl.dataproducer.data)]

       (Note that by construction the same relations should hold for :obj:`cl.dtrajs`)

    sort_by : None or int, default is None
       Default behaviour is to return the catalogues in the same order of clustercenters as the input,
       but it is sometimes useful have them sorted by ascending order of the n-th coordinate
       of the input space

    Returns
    --------

    cat_idxs : list of 2D np.arrays
        The discrete catalogue. It is a list of len = :obj:`cl.n_clustercenters` containing a 2D vector
        with all the (file, frame)-pairs in which each clustercenter appears

    cat_data : list of ndarrays
        The actual value (assumed continuous) of the data at the (file-frame)-pairs of the :obj:`cat_idxs` list

    tested: True
    """

    idata = cl.data_producer.data
    if data is not None:
        assert len(data) == len(idata)
        assert _np.all(
            [len(jdata) == len(ddata) for jdata, ddata in zip(idata, data)])
        idata = data

    cat_idxs = _index_states(cl.dtrajs)
    cat_cont = []
    for __, icat in enumerate(cat_idxs):
        cat_cont.append(_np.vstack([idata[ii][jj] for ii, jj in icat]))

    if sort_by is not None:
        assert _is_int(sort_by)
        assert sort_by <= cl.clustercenters.shape[
            1], "Want to sort by %u-th coord, but centers have only %u dims" % (
                sort_by, cl.clustercenters.shape[1])
        sorts_coordinate = _np.argsort(cl.clustercenters[:, sort_by])
        cat_idxs = [cat_idxs[ii] for ii in sorts_coordinate]
        cat_cont = [cat_cont[ii] for ii in sorts_coordinate]

    return cat_idxs, cat_cont
Exemple #7
0
def listify_if_int(inp):
    if _is_int(inp):
        inp = [inp]

    return inp
Exemple #8
0
def add_atom_idxs_widget(atom_idxs, ngl_wdg, color_list=None, radius=1):
    r"""
    provided a list of atom_idxs and a ngl_wdg, try to represent them as well as possible in the ngl_wdg
    It is assumed that this method is called once per feature, ie. the number of atoms defines the
    feature. This way, the method decides how to best represent them
    best to represent them. Currently, that means:
     * single atoms:   assume cartesian feature, represent with spacefill
     * pairs of atoms: assume distance feature, represent with distance
     * everything else is ignored

    Parameters
    ----------

    atom_idxs : list of iterables of integers. If [], the method won't do anything

    ngl_wdg : nglview ngl_wdg on which to represent stuff

    color_list: list, default is None
        list of colors to provide the representations with. The default None yields blue.
        In principle, len(atom_idxs) should be == len(color_list),
        but if your list is short it will just default to the last color. This way, color_list=['black'] will paint
        all black regardless len(atom_idxs)

    radius : float, default is 1
        radius of the spacefill representation

    Returns
    -------
    ngl_wdg : Input ngl_wdg with the representations added

    """

    if color_list in [None, [None]]:
        color_list = ['blue'] * len(atom_idxs)
    elif isinstance(color_list, list) and len(color_list) < len(atom_idxs):
        color_list += [color_list[-1]] * (len(atom_idxs) - len(color_list))

    if atom_idxs is not []:
        for cc in range(len(ngl_wdg._ngl_component_ids)):
            for iidxs, color in zip(atom_idxs, color_list):
                if _is_int(iidxs):
                    ngl_wdg.add_spacefill(selection=[iidxs],
                                          radius=radius,
                                          color=color,
                                          component=cc)
                elif _np.ndim(iidxs) > 0 and len(iidxs) == 2:
                    ngl_wdg.add_distance(
                        atom_pair=[[ii for ii in iidxs]
                                   ],  # yes it has to be this way for now
                        color=color,
                        #label_color='black',
                        label_size=0,
                        component=cc)
                    # TODO add line thickness as **kwarg
                elif _np.ndim(iidxs) > 0 and len(iidxs) in [3, 4]:
                    ngl_wdg.add_spacefill(selection=iidxs,
                                          radius=radius,
                                          color=color,
                                          component=cc)
                else:
                    print(
                        "Cannot represent features involving more than 5 atoms per single feature"
                    )

    return ngl_wdg