Beispiel #1
0
def test_dCOV():
    # Few simple tests to verify that the measure seems to be ok
    for N in xrange(1, 10): # sweep through size of the first data
        # We will compare to R implementation
        M, T = 4, 100
        x = np.random.normal(size=(N, T)) + np.random.normal()*10
        R = np.random.normal(size=(N, M))

        # linearly dependent variable after rotation
        dCov, dCor, _, _ = dCOV(x, 10 * np.dot(R.T, x))
        ok_(dCor > 0.7)           # should be really high but might fluctuate

        # completely independent variable
        dCov, dCor, _, _ = dCOV(x, np.random.normal(size=x.shape))
        # more dimension in x -- more uncertainty that they are
        # independent below is a heuristic (for T=100) and we should
        # just implement proper bootstrap significance estimation for
        # dCor
        ok_(dCor < 0.2+N/2.0)           # should be really high but might fluctuate

        # the same variable -- things should match for dCov and dVar's
        dCov, dCor, dVarx, dVary = dCOV(x, x)
        assert_equal(dCov, dVarx)
        assert_equal(dCov, dVary)
        assert_equal(dCor, 1.)
        assert_equal(dcorcoef(x, x), 1)
        #+ np.random.normal(size=(M, T)) \
        #    + np.random.normal(size=(M,))[:, None] # offset

        # Test that would work on vectors
        dCov, dCor, dVarx, dVary = dCOV(np.arange(N), np.sin(np.arange(N)/3.))
        if N>1:
            ok_(dCor > 0.6)           # should be really high but might fluctuate
        assert_equal(dcorcoef(np.arange(N), np.sin(np.arange(N)/3.)),  dCor)
Beispiel #2
0
def plot_scatter(dataXd, mask=None, masked_opacity=0.,
                 labels=None, colors=True,
                 dimcolor=1, title=None, limits='auto',
                 thresholds=None, hint_opacity=0.9,
                 x_jitter=None, y_jitter=None,
                 fig=None,
                 ax_scatter=None, ax_hist_x=None, ax_hist_y=None,
                 bp_location='scatter',
                 xlim=None, ylim=None,
                 rasterized=None,
                 uniq=False,
                 include_stats=False,
                 ):
    """
    Parameters
    ----------
    dataXd: array
      The volumetric (or not) data to plot where first dimension
      should only have 2 items
    mask: array, optional
      Additional mask to specify which values do not consider to plot.
      By default values with 0s in both dimensions are not plotted.
    masked_opacity: float, optional
      By default masked out values are not plotted at all.  Value in
      (0,1] will make them visible with this specified opacity
    labels: list of str, optional
      Labels to place for x and y axes
    colors: bool or string or colormap, optional
      Either to use colors to associate with physical location and
      what colormap to use (jet by default if colors=True)
    dimcolor: int
      If `colors`, then which dimension (within given 3D volume) to
      "track"
    limits: 'auto', 'same', 'per-axis' or (min, max)
      Limits for axes: when 'auto' if data ranges overlap is more than
      50% of the union range, 'same' is considered.  When 'same' --
      the same limits on both axes as determined by data.  If
      two-element tuple or list is provided, then that range is
      applied to both axes.
    hint_opacity: float, optional
      If `colors` is True, to then a "slice" of the volumetric data
      is plotted in the specified opacity to hint about the location
      of points in the original Xd data in `dimcolor` dimension
    x_jitter: float, optional
      Half-width of uniform noise added to x values.  Might be useful if data
      is quantized so it is valuable to jitter points a bit.
    y_jitter: float, optional
      Half-width of uniform noise added to y values.  Might be useful if data
      is quantized so it is valuable to jitter points a bit
    fig : Figure, optional
      Figure to plot on, otherwise new one created
    ax_*: axes, optional
      Axes for the scatter plot and histograms. If none of them is specified
      (which is the default) then 'classical' plot is rendered with histograms
      above and to the right
    bp_location: ('scatter', 'hist', None), optional
      Where to place boxplots depicting data range
    xlim: tuple, optional
    ylim: tuple, optional
      To fix plotted range
    rasterized: bool, optional
      Passed to scatter call, to allow rasterization of heavy scatter plots
    uniq: bool, optional
      Plot uniq values (those present in one but not in the other) along
      each axis with crosses
    include_stats: bool, optional
      Whether to report additional statistics on the data. Stats are also
      reported via verbose at level 2
    """
    if len(dataXd) != 2:
        raise ValueError("First axis of dataXd can only have two dimensions, "
                         "got {0}".format(len(dataXd)))
    dataXd = np.asanyarray(dataXd)      # TODO: allow to operate on list of arrays to not waste RAM/cycles
    data = dataXd.reshape((2, -1))
    if dataXd.ndim < 5:
        ntimepoints = 1
    elif dataXd.ndim == 5:
        ntimepoints = dataXd.shape[-1]
    else:
        raise ValueError("Do not know how to handle data with %d dimensions" % (dataXd.ndim - 1))
    if x_jitter or y_jitter:
        data = data.copy()              # lazy and wasteful
        def jitter_me(x, w):
            x += np.random.uniform(-w, w, size=data.shape[-1])
        if x_jitter:
            jitter_me(data[0, :], x_jitter)
        if y_jitter:
            jitter_me(data[1, :], y_jitter)

    finites = np.isfinite(data)
    nz = np.logical_and(data != 0, finites)
    # TODO : avoid doing data !=0 and just use provided utter mask
    #nz[:, 80000:] = False # for quick testing

    nzsum = np.sum(nz, axis=0)

    intersection = nzsum == 2
    # for coloring we would need to know all the indices
    union = nzsum > 0
    x, y = datainter = data[:, intersection]

    if mask is not None:
        if mask.size * ntimepoints == intersection.size:
            # we have got a single mask applicable to both x and y
            pass
        elif mask.size * ntimepoints == 2 * intersection.size:
            # we have got a mask per each, let's get an intersection
            assert mask.shape[0] == 2, "had to get 1 for x, 1 for y"
            mask = np.logical_and(mask[0], mask[1])
        else:
            raise ValueError(
                "mask of shape %s. data of shape %s. ntimepoints=%d.  "
                "Teach me how to apply it" % (mask.shape, data.shape, ntimepoints)
            )
        # replicate mask ntimepoints times
        mask = np.repeat(mask.ravel(), ntimepoints)[intersection] != 0
        x_masked = x[mask]
        y_masked = y[mask]


    xnoty = (nz[0].astype(int) - nz[1].astype(int))>0
    ynotx = (nz[1].astype(int) - nz[0].astype(int))>0

    msg = ''
    if not np.all(finites):
        msg = " non-finite x: %d, y: %d" % (np.sum(~finites[0]), np.sum(~finites[1]))

    verbose(1, "total: %d union: %d%s intersection: %d x_only: %d y_only: %d%s"
            % (len(nzsum),
               np.sum(union),
               mask is not None and ' masked: %d' % np.sum(mask) or '',
               np.sum(intersection),
               np.sum(xnoty), np.sum(ynotx),
               msg))

    if include_stats:
        # report some statistics as well
        import scipy.stats as ss
        r, p = ss.pearsonr(x, y)
        d = np.linalg.norm(x-y)
        statsline = "r=%.2f  p=%.4g  ||x-y||=%.4g" % (r, p, d)
        try:
            from mvpa2.misc.dcov import dcorcoef
            nmax = min(1000, len(x))
            idx = np.random.permutation(np.arange(len(x)))[:nmax]
            dcor = dcorcoef(x[idx], y[idx])
            dcor_s = '' if len(x) == nmax else '[%d random]' % nmax
            statsline += '  dcorr%s=%.4g' % (dcor_s, dcor)
        except ImportError:
            pass
        verbose(2, statsline)
    else:
        statsline = ''


    #fig=pl.figure()
    #pl.plot(datainter[0], datainter[1], '.')
    #fig.show()

    nullfmt  = pl.NullFormatter()         # no labels

    # definitions for the axes
    left, width = 0.1, 0.65
    bottom, height = 0.1, 0.65
    bottom_h = left_h = left+width+0.02

    if not (bool(ax_scatter) or bool(ax_hist_x) or bool(ax_hist_y)): # no custom axes specified
        # our default setup
        rect_scatter = [left, bottom, width, height]
        rect_histx = [left, bottom_h, width, 0.2]
        rect_histy = [left_h, bottom, 0.2, height]

        # start with a rectangular Figure
        if fig is None:
            fig = pl.figure(figsize=(10,10))

        ax_scatter = pl.axes(rect_scatter)
        ax_hist_x = pl.axes(rect_histx)
        ax_hist_y = pl.axes(rect_histy)

    else:
        # check if all not None?
        # assert(len(axes) == 3)

        ax_bp_x, ax_bp_y = None, None
        if ax_scatter is None:
            raise ValueError("Makes no sense to do not have scatter plot")

    ax_bp_x = ax_bp_y = None
    if bp_location is not None:
        ax_bp_x_parent = ax_bp_y_parent = None
        if bp_location == 'scatter':
            # place boxplots into histogram plots
            ax_bp_x_parent = ax_scatter
            ax_bp_y_parent = ax_scatter
        elif bp_location == 'hist':
            ax_bp_x_parent = ax_hist_x
            ax_bp_y_parent = ax_hist_y
        else:
            raise ValueError("bp_location needs to be from (None, 'scatter', 'hist')")

        if ax_bp_x_parent:
            hist_x_pos = ax_bp_x_parent.get_position()
            ax_bp_x = pl_axes( [hist_x_pos.x0,    hist_x_pos.y0 + hist_x_pos.height * 0.9,
                                hist_x_pos.width, hist_x_pos.height * 0.1],  facecolor='y' )

        if ax_bp_y_parent:
            hist_y_pos = ax_bp_y_parent.get_position()
            ax_bp_y = pl_axes( [hist_y_pos.x0 + hist_y_pos.width*0.9,  hist_y_pos.y0,
                                hist_y_pos.width * 0.1, hist_y_pos.height],  facecolor='y' )

        # ax_bp_y = pl_axes( [left + width * 0.9, bottom, width/10, height], facecolor='y' ) if ax_hist_y else None


    sc_kwargs = dict(facecolors='none', s=1, rasterized=rasterized) # common kwargs

    # let's use colormap to get non-boring colors
    cm = colors                     # e.g. if it is None
    if colors is True:
        cm = pl.matplotlib.cm.get_cmap('jet')
    elif isinstance(colors, str):
        cm = pl.matplotlib.cm.get_cmap(colors)
    if cm and len(dataXd.shape) > dimcolor+1:
        cm.set_under((1, 1, 1, 0.1))             # transparent what is not in range
        # we need to get our indices back for those we are going to plot.  probably this is the least efficient way:
        ndindices_all = np.array(list(np.ndindex(dataXd.shape[1:])))
        ndindices_nz = ndindices_all[intersection]
        # choose color based on dimcolor
        dimcolor_len = float(dataXd.shape[1+dimcolor])
        edgecolors = cm(((cm.N-1) * ndindices_nz[:, dimcolor] / dimcolor_len).astype(int))
        if mask is not None:
            # Plot first those which might be masked out
            if masked_opacity:
                mask_inv = np.logical_not(mask)
                mask_edgecolors = edgecolors[mask_inv].copy()
                # Adjust alpha value
                mask_edgecolors[:, -1] *= masked_opacity
                ax_scatter.scatter(x[mask_inv], y[mask_inv],
                                  edgecolors=mask_edgecolors,
                                  alpha=masked_opacity,
                                  **sc_kwargs)

            # Plot (on top) those which are not masked-out
            if mask.size:
                x_plot, y_plot, edgecolors_plot = x[mask], y[mask], edgecolors[mask]
            else:
                # older numpys blow here
                x_plot, y_plot, edgecolors_plot = (np.array([]),) * 3
        else:
            # Just plot all of them at once
            x_plot, y_plot, edgecolors_plot = x, y, edgecolors

        if len(x_plot):
            ax_scatter.scatter(x_plot, y_plot, edgecolors=edgecolors_plot,
                             **sc_kwargs)

        # for orientation we need to plot 1 slice... assume that the last dimension is z -- figure out a slice with max # of non-zeros
        zdim_entries = ndindices_nz[:, -1]
        if np.size(zdim_entries):
            zdim_counts, _ = np.histogram(zdim_entries, bins=np.arange(0, np.max(zdim_entries)+1))
            zdim_max = np.argmax(zdim_counts)

            if hint_opacity:
                # now we need to plot that zdim_max slice taking into account our colormap
                # create new axes
                axslice = pl_axes([left, bottom+height * 0.72, width/4., height/5.],
                                  facecolor='y')
                axslice.axis('off')
                sslice = np.zeros(dataXd.shape[1:3]) # XXX hardcoded assumption on dimcolor =1
                sslice[:, : ] = np.arange(dimcolor_len)[None, :]
                # if there is time dimension -- choose minimal value across all values
                dataXd_mint = np.min(dataXd, axis=-1) if dataXd.ndim == 5 else dataXd
                sslice[dataXd_mint[0, ..., zdim_max] == 0] = -1 # reset those not in the picture to be "under" range
                axslice.imshow(sslice, alpha=hint_opacity, cmap=cm)
    else:
        # the scatter plot without colors to distinguish location
        ax_scatter.scatter(x, y, **sc_kwargs)

    if labels:
        ax_scatter.set_xlabel(labels[0])
        ax_scatter.set_ylabel(labels[1])

    # "unique" points on each of the axes
    if uniq:
        if np.sum(xnoty):
            ax_scatter.scatter(fill_nonfinites(data[0, np.where(xnoty)[0]]),
                              fill_nonfinites(data[1, np.where(xnoty)[0]]),
                              edgecolor='b', **sc_kwargs)
        if np.sum(ynotx):
            ax_scatter.scatter(fill_nonfinites(data[0, np.where(ynotx)[0]]),
                              fill_nonfinites(data[1, np.where(ynotx)[0]]),
                              edgecolor='g', **sc_kwargs)


    # Axes
    if np.size(x):
        ax_scatter.plot((np.min(x), np.max(x)), (0, 0), 'r', alpha=0.5)
    else:
        warning("There is nothing to plot, returning early")
        return pl.gcf()

    ax_scatter.plot((0, 0), (np.min(y), np.max(y)), 'r', alpha=0.5)

    if (mask is not None and not masked_opacity and np.sum(mask)):
        # if there is a non-degenerate mask which was not intended to be plotted,
        # take those values away while estimating min/max range
        _ = x[mask]; minx, maxx = np.min(_), np.max(_)
        _ = y[mask]; miny, maxy = np.min(_), np.max(_)
        del _                           # no need to consume RAM
        # print "Here y range", miny, maxy
    else:
        minx, maxx = np.min(x), np.max(x)
        miny, maxy = np.min(y), np.max(y)

    # Process 'limits' option
    if isinstance(limits, str):
        limits = limits.lower()
        if limits == 'auto':
            overlap = min(maxx, maxy) - max(minx, miny)
            range_ = max(maxx, maxy) - min(minx, miny)
            limits = {True: 'same', False: 'per-axis'}[not range_ or overlap/float(range_) > 0.5]

        if limits == 'per-axis':
            same_range = False
            if xlim is None:
                # add some white border
                dx = (maxx - minx)/20.
                xlim = (minx-dx, maxx+dx)
            if ylim is None:
                dy = (maxy - miny)/20.
                ylim = (miny-dy, maxy+dy)

        elif limits == 'same':
            same_range = True
            # assign limits the numerical range
            limits = (np.min( [minx, miny] ),  np.max( [maxx, maxy] ))
        else:
            raise ValueError("Do not know how to handle same_range=%r" % (limits,))
    else:
        same_range = True

    # Let's now plot threshold lines if provided
    if thresholds is not None:
        stylekwargs = dict(colors='k', linestyles='dotted')
        if len(thresholds):
            ax_scatter.vlines(thresholds[0], ax_scatter.get_xlim()[0]*0.9,
                             ax_scatter.get_xlim()[1]*0.9, **stylekwargs)
        if len(thresholds)>1:
            ax_scatter.hlines(thresholds[1], ax_scatter.get_ylim()[0]*0.9,
                             ax_scatter.get_ylim()[1]*0.9, **stylekwargs)

    if same_range:
        # now determine nice limits by hand:
        binwidthx = binwidthy = binwidth = np.max(datainter)/51. # 0.25

        minxy, maxxy = limits
        sgn = np.sign(minxy)
        xyrange = maxxy - minxy
        xyamax = np.max( [np.max(np.fabs(x)), np.max(np.fabs(y))] )
        limn = sgn*( int(sgn*minxy/binwidth) - sgn) * binwidth
        limp = ( int(maxxy/binwidth) + 1) * binwidth

        ax_scatter.plot((limn*0.9, limp*0.9), (limn*0.9, limp*0.9), 'y--')
        if xlim is None:
            xlim = (limn, limp)
        if ylim is None:
            ylim = (limn, limp)

        binsx = binsy = bins = np.arange(limn, limp + binwidth, binwidth)
    else:
        binwidthx = (maxx - minx)/51.
        binwidthy = (maxy - miny)/51.

        try:
            binsx = np.arange(minx, maxx + binwidthx, binwidthx)
            binsy = np.arange(miny, maxy + binwidthy, binwidthy)
        except Exception as exc:
            warning(
                "Received following exception while trying to get bins for "
                "minx=%(minx)f maxx=%(maxx)f binwidthx=%(binwidthx)s "
                "miny=%(miny)f maxy=%(maxy)f binwidthy=%(binwidthy)s: %(exc)s. "
                "Returning early"
                % locals()
            )
            return pl.gcf()

    if xlim is not None:
        ax_scatter.set_xlim( xlim )
    if ylim is not None:
        ax_scatter.set_ylim( ylim )

    # get values to plot for histogram and boxplot
    x_hist, y_hist = (x, y) if (mask is None or not np.sum(mask)) else (x_masked, y_masked)

    if np.any(binsx) and ax_hist_x is not None:
        ax_hist_x.xaxis.set_major_formatter(nullfmt)
        histx = ax_hist_x.hist(x_hist, bins=binsx, facecolor='b')
        ax_hist_x.set_xlim( ax_scatter.get_xlim() )
        ax_hist_x.vlines(0, 0, 0.9*np.max(histx[0]), 'r')

    if np.any(binsy) and ax_hist_y is not None:
        ax_hist_y.yaxis.set_major_formatter(nullfmt)
        histy = ax_hist_y.hist(y_hist, bins=binsy,
                               orientation='horizontal', facecolor='g')
        ax_hist_y.set_ylim( ax_scatter.get_ylim() )
        ax_hist_y.hlines(0, 0, 0.9*np.max(histy[0]), 'r')

    rect_scatter = [left, bottom, width, height]

    # Box plots
    if ax_bp_x is not None:
        ax_bp_x.axis('off')
        bpx = ax_bp_x.boxplot(x_hist, vert=0) #'r', 0)
        ax_bp_x.set_xlim(ax_scatter.get_xlim())

    if ax_bp_y is not None:
        ax_bp_y.axis('off')
        bpy = ax_bp_y.boxplot(y_hist, sym='g+')
        ax_bp_y.set_ylim(ax_scatter.get_ylim())

    if statsline:
        # draw the text based on gca
        y1, y2 = ax_scatter.get_ylim(); x1, x2 = ax_scatter.get_xlim();
        ax_scatter.text(0.5*(x1+x2),            # center
                       y2 - 0.02*(y2-y1),
                       statsline,
                       verticalalignment = "top", horizontalalignment="center")

    if title:
        pl.title(title)

    return pl.gcf()
Beispiel #3
0
def plot_scatter(
    dataXd,
    mask=None,
    masked_opacity=0.,
    labels=None,
    colors=True,
    dimcolor=1,
    title=None,
    limits='auto',
    thresholds=None,
    hint_opacity=0.9,
    x_jitter=None,
    y_jitter=None,
    fig=None,
    ax_scatter=None,
    ax_hist_x=None,
    ax_hist_y=None,
    bp_location='scatter',
    xlim=None,
    ylim=None,
    rasterized=None,
    uniq=False,
    include_stats=False,
):
    """
    Parameters
    ----------
    dataXd: array
      The volumetric (or not) data to plot where first dimension
      should only have 2 items
    mask: array, optional
      Additional mask to specify which values do not consider to plot.
      By default values with 0s in both dimensions are not plotted.
    masked_opacity: float, optional
      By default masked out values are not plotted at all.  Value in
      (0,1] will make them visible with this specified opacity
    labels: list of str, optional
      Labels to place for x and y axes
    colors: bool or string or colormap, optional
      Either to use colors to associate with physical location and
      what colormap to use (jet by default if colors=True)
    dimcolor: int
      If `colors`, then which dimension (within given 3D volume) to
      "track"
    limits: 'auto', 'same', 'per-axis' or (min, max)
      Limits for axes: when 'auto' if data ranges overlap is more than
      50% of the union range, 'same' is considered.  When 'same' --
      the same limits on both axes as determined by data.  If
      two-element tuple or list is provided, then that range is
      applied to both axes.
    hint_opacity: float, optional
      If `colors` is True, to then a "slice" of the volumetric data
      is plotted in the specified opacity to hint about the location
      of points in the original Xd data in `dimcolor` dimension
    x_jitter: float, optional
      Half-width of uniform noise added to x values.  Might be useful if data
      is quantized so it is valuable to jitter points a bit.
    y_jitter: float, optional
      Half-width of uniform noise added to y values.  Might be useful if data
      is quantized so it is valuable to jitter points a bit
    fig : Figure, optional
      Figure to plot on, otherwise new one created
    ax_*: axes, optional
      Axes for the scatter plot and histograms. If none of them is specified
      (which is the default) then 'classical' plot is rendered with histograms
      above and to the right
    bp_location: ('scatter', 'hist', None), optional
      Where to place boxplots depicting data range
    xlim: tuple, optional
    ylim: tuple, optional
      To fix plotted range
    rasterized: bool, optional
      Passed to scatter call, to allow rasterization of heavy scatter plots
    uniq: bool, optional
      Plot uniq values (those present in one but not in the other) along
      each axis with crosses
    include_stats: bool, optional
      Whether to report additional statistics on the data. Stats are also
      reported via verbose at level 2
    """
    if len(dataXd) != 2:
        raise ValueError("First axis of dataXd can only have two dimensions, "
                         "got {0}".format(len(dataXd)))
    dataXd = np.asanyarray(
        dataXd
    )  # TODO: allow to operate on list of arrays to not waste RAM/cycles
    data = dataXd.reshape((2, -1))
    if dataXd.ndim < 5:
        ntimepoints = 1
    elif dataXd.ndim == 5:
        ntimepoints = dataXd.shape[-1]
    else:
        raise ValueError("Do not know how to handle data with %d dimensions" %
                         (dataXd.ndim - 1))
    if x_jitter or y_jitter:
        data = data.copy()  # lazy and wasteful

        def jitter_me(x, w):
            x += np.random.uniform(-w, w, size=data.shape[-1])

        if x_jitter:
            jitter_me(data[0, :], x_jitter)
        if y_jitter:
            jitter_me(data[1, :], y_jitter)

    finites = np.isfinite(data)
    nz = np.logical_and(data != 0, finites)
    # TODO : avoid doing data !=0 and just use provided utter mask
    #nz[:, 80000:] = False # for quick testing

    nzsum = np.sum(nz, axis=0)

    intersection = nzsum == 2
    # for coloring we would need to know all the indices
    union = nzsum > 0
    x, y = datainter = data[:, intersection]

    if mask is not None:
        if mask.size * ntimepoints == intersection.size:
            # we have got a single mask applicable to both x and y
            pass
        elif mask.size * ntimepoints == 2 * intersection.size:
            # we have got a mask per each, let's get an intersection
            assert mask.shape[0] == 2, "had to get 1 for x, 1 for y"
            mask = np.logical_and(mask[0], mask[1])
        else:
            raise ValueError(
                "mask of shape %s. data of shape %s. ntimepoints=%d.  "
                "Teach me how to apply it" %
                (mask.shape, data.shape, ntimepoints))
        # replicate mask ntimepoints times
        mask = np.repeat(mask.ravel(), ntimepoints)[intersection] != 0
        x_masked = x[mask]
        y_masked = y[mask]

    xnoty = (nz[0].astype(int) - nz[1].astype(int)) > 0
    ynotx = (nz[1].astype(int) - nz[0].astype(int)) > 0

    msg = ''
    if not np.all(finites):
        msg = " non-finite x: %d, y: %d" % (np.sum(~finites[0]),
                                            np.sum(~finites[1]))

    verbose(
        1, "total: %d union: %d%s intersection: %d x_only: %d y_only: %d%s" %
        (len(nzsum), np.sum(union),
         mask is not None and ' masked: %d' % np.sum(mask)
         or '', np.sum(intersection), np.sum(xnoty), np.sum(ynotx), msg))

    if include_stats:
        # report some statistics as well
        import scipy.stats as ss
        r, p = ss.pearsonr(x, y)
        d = np.linalg.norm(x - y)
        statsline = "r=%.2f  p=%.4g  ||x-y||=%.4g" % (r, p, d)
        try:
            from mvpa2.misc.dcov import dcorcoef
            nmax = min(1000, len(x))
            idx = np.random.permutation(np.arange(len(x)))[:nmax]
            dcor = dcorcoef(x[idx], y[idx])
            dcor_s = '' if len(x) == nmax else '[%d random]' % nmax
            statsline += '  dcorr%s=%.4g' % (dcor_s, dcor)
        except ImportError:
            pass
        verbose(2, statsline)
    else:
        statsline = ''

    #fig=pl.figure()
    #pl.plot(datainter[0], datainter[1], '.')
    #fig.show()

    nullfmt = pl.NullFormatter()  # no labels

    # definitions for the axes
    left, width = 0.1, 0.65
    bottom, height = 0.1, 0.65
    bottom_h = left_h = left + width + 0.02

    if not (bool(ax_scatter) or bool(ax_hist_x)
            or bool(ax_hist_y)):  # no custom axes specified
        # our default setup
        rect_scatter = [left, bottom, width, height]
        rect_histx = [left, bottom_h, width, 0.2]
        rect_histy = [left_h, bottom, 0.2, height]

        # start with a rectangular Figure
        if fig is None:
            fig = pl.figure(figsize=(10, 10))

        ax_scatter = pl.axes(rect_scatter)
        ax_hist_x = pl.axes(rect_histx)
        ax_hist_y = pl.axes(rect_histy)

    else:
        # check if all not None?
        # assert(len(axes) == 3)

        ax_bp_x, ax_bp_y = None, None
        if ax_scatter is None:
            raise ValueError("Makes no sense to do not have scatter plot")

    ax_bp_x = ax_bp_y = None
    if bp_location is not None:
        ax_bp_x_parent = ax_bp_y_parent = None
        if bp_location == 'scatter':
            # place boxplots into histogram plots
            ax_bp_x_parent = ax_scatter
            ax_bp_y_parent = ax_scatter
        elif bp_location == 'hist':
            ax_bp_x_parent = ax_hist_x
            ax_bp_y_parent = ax_hist_y
        else:
            raise ValueError(
                "bp_location needs to be from (None, 'scatter', 'hist')")

        if ax_bp_x_parent:
            hist_x_pos = ax_bp_x_parent.get_position()
            ax_bp_x = pl_axes([
                hist_x_pos.x0, hist_x_pos.y0 + hist_x_pos.height * 0.9,
                hist_x_pos.width, hist_x_pos.height * 0.1
            ],
                              facecolor='y')

        if ax_bp_y_parent:
            hist_y_pos = ax_bp_y_parent.get_position()
            ax_bp_y = pl_axes([
                hist_y_pos.x0 + hist_y_pos.width * 0.9, hist_y_pos.y0,
                hist_y_pos.width * 0.1, hist_y_pos.height
            ],
                              facecolor='y')

        # ax_bp_y = pl_axes( [left + width * 0.9, bottom, width/10, height], facecolor='y' ) if ax_hist_y else None

    sc_kwargs = dict(facecolors='none', s=1,
                     rasterized=rasterized)  # common kwargs

    # let's use colormap to get non-boring colors
    cm = colors  # e.g. if it is None
    if colors is True:
        cm = pl.matplotlib.cm.get_cmap('jet')
    elif isinstance(colors, str):
        cm = pl.matplotlib.cm.get_cmap(colors)
    if cm and len(dataXd.shape) > dimcolor + 1:
        cm.set_under((1, 1, 1, 0.1))  # transparent what is not in range
        # we need to get our indices back for those we are going to plot.  probably this is the least efficient way:
        ndindices_all = np.array(list(np.ndindex(dataXd.shape[1:])))
        ndindices_nz = ndindices_all[intersection]
        # choose color based on dimcolor
        dimcolor_len = float(dataXd.shape[1 + dimcolor])
        edgecolors = cm(((cm.N - 1) * ndindices_nz[:, dimcolor] /
                         dimcolor_len).astype(int))
        if mask is not None:
            # Plot first those which might be masked out
            if masked_opacity:
                mask_inv = np.logical_not(mask)
                mask_edgecolors = edgecolors[mask_inv].copy()
                # Adjust alpha value
                mask_edgecolors[:, -1] *= masked_opacity
                ax_scatter.scatter(x[mask_inv],
                                   y[mask_inv],
                                   edgecolors=mask_edgecolors,
                                   alpha=masked_opacity,
                                   **sc_kwargs)

            # Plot (on top) those which are not masked-out
            if mask.size:
                x_plot, y_plot, edgecolors_plot = x[mask], y[mask], edgecolors[
                    mask]
            else:
                # older numpys blow here
                x_plot, y_plot, edgecolors_plot = (np.array([]), ) * 3
        else:
            # Just plot all of them at once
            x_plot, y_plot, edgecolors_plot = x, y, edgecolors

        if len(x_plot):
            ax_scatter.scatter(x_plot,
                               y_plot,
                               edgecolors=edgecolors_plot,
                               **sc_kwargs)

        # for orientation we need to plot 1 slice... assume that the last dimension is z -- figure out a slice with max # of non-zeros
        zdim_entries = ndindices_nz[:, -1]
        if np.size(zdim_entries):
            zdim_counts, _ = np.histogram(zdim_entries,
                                          bins=np.arange(
                                              0,
                                              np.max(zdim_entries) + 1))
            zdim_max = np.argmax(zdim_counts)

            if hint_opacity:
                # now we need to plot that zdim_max slice taking into account our colormap
                # create new axes
                axslice = pl_axes(
                    [left, bottom + height * 0.72, width / 4., height / 5.],
                    facecolor='y')
                axslice.axis('off')
                sslice = np.zeros(dataXd.shape[1:3]
                                  )  # XXX hardcoded assumption on dimcolor =1
                sslice[:, :] = np.arange(dimcolor_len)[None, :]
                # if there is time dimension -- choose minimal value across all values
                dataXd_mint = np.min(dataXd,
                                     axis=-1) if dataXd.ndim == 5 else dataXd
                sslice[
                    dataXd_mint[0, ..., zdim_max] ==
                    0] = -1  # reset those not in the picture to be "under" range
                axslice.imshow(sslice, alpha=hint_opacity, cmap=cm)
    else:
        # the scatter plot without colors to distinguish location
        ax_scatter.scatter(x, y, **sc_kwargs)

    if labels:
        ax_scatter.set_xlabel(labels[0])
        ax_scatter.set_ylabel(labels[1])

    # "unique" points on each of the axes
    if uniq:
        if np.sum(xnoty):
            ax_scatter.scatter(fill_nonfinites(data[0, np.where(xnoty)[0]]),
                               fill_nonfinites(data[1, np.where(xnoty)[0]]),
                               edgecolor='b',
                               **sc_kwargs)
        if np.sum(ynotx):
            ax_scatter.scatter(fill_nonfinites(data[0, np.where(ynotx)[0]]),
                               fill_nonfinites(data[1, np.where(ynotx)[0]]),
                               edgecolor='g',
                               **sc_kwargs)

    # Axes
    if np.size(x):
        ax_scatter.plot((np.min(x), np.max(x)), (0, 0), 'r', alpha=0.5)
    else:
        warning("There is nothing to plot, returning early")
        return pl.gcf()

    ax_scatter.plot((0, 0), (np.min(y), np.max(y)), 'r', alpha=0.5)

    if (mask is not None and not masked_opacity and np.sum(mask)):
        # if there is a non-degenerate mask which was not intended to be plotted,
        # take those values away while estimating min/max range
        _ = x[mask]
        minx, maxx = np.min(_), np.max(_)
        _ = y[mask]
        miny, maxy = np.min(_), np.max(_)
        del _  # no need to consume RAM
        # print "Here y range", miny, maxy
    else:
        minx, maxx = np.min(x), np.max(x)
        miny, maxy = np.min(y), np.max(y)

    # Process 'limits' option
    if isinstance(limits, str):
        limits = limits.lower()
        if limits == 'auto':
            overlap = min(maxx, maxy) - max(minx, miny)
            range_ = max(maxx, maxy) - min(minx, miny)
            limits = {
                True: 'same',
                False: 'per-axis'
            }[not range_ or overlap / float(range_) > 0.5]

        if limits == 'per-axis':
            same_range = False
            if xlim is None:
                # add some white border
                dx = (maxx - minx) / 20.
                xlim = (minx - dx, maxx + dx)
            if ylim is None:
                dy = (maxy - miny) / 20.
                ylim = (miny - dy, maxy + dy)

        elif limits == 'same':
            same_range = True
            # assign limits the numerical range
            limits = (np.min([minx, miny]), np.max([maxx, maxy]))
        else:
            raise ValueError("Do not know how to handle same_range=%r" %
                             (limits, ))
    else:
        same_range = True

    # Let's now plot threshold lines if provided
    if thresholds is not None:
        stylekwargs = dict(colors='k', linestyles='dotted')
        if len(thresholds):
            ax_scatter.vlines(thresholds[0],
                              ax_scatter.get_xlim()[0] * 0.9,
                              ax_scatter.get_xlim()[1] * 0.9, **stylekwargs)
        if len(thresholds) > 1:
            ax_scatter.hlines(thresholds[1],
                              ax_scatter.get_ylim()[0] * 0.9,
                              ax_scatter.get_ylim()[1] * 0.9, **stylekwargs)

    if same_range:
        # now determine nice limits by hand:
        binwidthx = binwidthy = binwidth = np.max(datainter) / 51.  # 0.25

        minxy, maxxy = limits
        sgn = np.sign(minxy)
        xyrange = maxxy - minxy
        xyamax = np.max([np.max(np.fabs(x)), np.max(np.fabs(y))])
        limn = sgn * (int(sgn * minxy / binwidth) - sgn) * binwidth
        limp = (int(maxxy / binwidth) + 1) * binwidth

        ax_scatter.plot((limn * 0.9, limp * 0.9), (limn * 0.9, limp * 0.9),
                        'y--')
        if xlim is None:
            xlim = (limn, limp)
        if ylim is None:
            ylim = (limn, limp)

        binsx = binsy = bins = np.arange(limn, limp + binwidth, binwidth)
    else:
        binwidthx = (maxx - minx) / 51.
        binwidthy = (maxy - miny) / 51.

        try:
            binsx = np.arange(minx, maxx + binwidthx, binwidthx)
            binsy = np.arange(miny, maxy + binwidthy, binwidthy)
        except Exception as exc:
            warning(
                "Received following exception while trying to get bins for "
                "minx=%(minx)f maxx=%(maxx)f binwidthx=%(binwidthx)s "
                "miny=%(miny)f maxy=%(maxy)f binwidthy=%(binwidthy)s: %(exc)s. "
                "Returning early" % locals())
            return pl.gcf()

    if xlim is not None:
        ax_scatter.set_xlim(xlim)
    if ylim is not None:
        ax_scatter.set_ylim(ylim)

    # get values to plot for histogram and boxplot
    x_hist, y_hist = (x,
                      y) if (mask is None or not np.sum(mask)) else (x_masked,
                                                                     y_masked)

    if np.any(binsx) and ax_hist_x is not None:
        ax_hist_x.xaxis.set_major_formatter(nullfmt)
        histx = ax_hist_x.hist(x_hist, bins=binsx, facecolor='b')
        ax_hist_x.set_xlim(ax_scatter.get_xlim())
        ax_hist_x.vlines(0, 0, 0.9 * np.max(histx[0]), 'r')

    if np.any(binsy) and ax_hist_y is not None:
        ax_hist_y.yaxis.set_major_formatter(nullfmt)
        histy = ax_hist_y.hist(y_hist,
                               bins=binsy,
                               orientation='horizontal',
                               facecolor='g')
        ax_hist_y.set_ylim(ax_scatter.get_ylim())
        ax_hist_y.hlines(0, 0, 0.9 * np.max(histy[0]), 'r')

    rect_scatter = [left, bottom, width, height]

    # Box plots
    if ax_bp_x is not None:
        ax_bp_x.axis('off')
        bpx = ax_bp_x.boxplot(x_hist, vert=0)  #'r', 0)
        ax_bp_x.set_xlim(ax_scatter.get_xlim())

    if ax_bp_y is not None:
        ax_bp_y.axis('off')
        bpy = ax_bp_y.boxplot(y_hist, sym='g+')
        ax_bp_y.set_ylim(ax_scatter.get_ylim())

    if statsline:
        # draw the text based on gca
        y1, y2 = ax_scatter.get_ylim()
        x1, x2 = ax_scatter.get_xlim()
        ax_scatter.text(
            0.5 * (x1 + x2),  # center
            y2 - 0.02 * (y2 - y1),
            statsline,
            verticalalignment="top",
            horizontalalignment="center")

    if title:
        pl.title(title)

    return pl.gcf()