Example #1
0
def load(filename):
    """Create a :py:class:`DataSetList` instance from a file or folder.
    
    Input argument filename can be a single :file:`info` file name, a
    single pickle filename or a folder name. In the latter case, the
    folder is browsed recursively for :file:`info` or :file:`pickle`
    files.

    """
    return pproc.DataSetList(filename)
Example #2
0
def plot(dsList, targets=single_target_values, **plotArgs):
    """Plot ECDF of evaluations and final function values
    in a single figure for demonstration purposes."""
    # targets = targets()  # TODO: this needs to be rectified
    # targets = targets.target_values
    dsList = pproc.DataSetList(dsList)
    assert len(dsList.dictByDim()) == 1, ('Cannot display different '
                                          'dimensionalities together')
    res = []

    plt.subplot(121)
    maxEvalsFactor = max(i.mMaxEvals() / i.dim for i in dsList)
    evalfmax = maxEvalsFactor
    for j in range(len(targets)):
        tmpplotArgs = dict(plotArgs, **rldStyles[j % len(rldStyles)])
        tmp = plotRLDistr(dsList, lambda fun_dim: targets(fun_dim)[j],
                          **tmpplotArgs)
        res.extend(tmp)
    res.append(plt.axvline(x=maxEvalsFactor, color='k', **plotArgs))
    funcs = list(i.funcId for i in dsList)
    text = 'f%s' % (consecutiveNumbers(sorted(funcs)))
    res.append(
        plt.text(0.5,
                 0.98,
                 text,
                 horizontalalignment="center",
                 verticalalignment="top",
                 transform=plt.gca().transAxes))

    plt.subplot(122)
    for j in [range(len(targets))[-1]]:
        tmpplotArgs = dict(plotArgs, **rldStyles[j % len(rldStyles)])
        tmp = plotFVDistr(dsList, evalfmax,
                          lambda fun_dim: targets(fun_dim)[j], **tmpplotArgs)
        res.extend(tmp)
    tmp = np.floor(np.log10(evalfmax))
    # coloring right to left:
    maxEvalsF = np.power(10, np.arange(0, tmp))
    for j in range(len(maxEvalsF)):
        tmpplotArgs = dict(plotArgs,
                           **rldUnsuccStyles[j % len(rldUnsuccStyles)])
        tmp = plotFVDistr(dsList, maxEvalsF[j],
                          lambda fun_dim: targets(fun_dim)[-1], **tmpplotArgs)
        res.extend(tmp)
    res.append(
        plt.text(0.98,
                 0.02,
                 text,
                 horizontalalignment="right",
                 transform=plt.gca().transAxes))
    return res
Example #3
0
def plot(dsList, targets=None, craftingeffort=0., **kwargs):
    """This function is obsolete?
    Generates a graph of the run length distribution of an algorithm.

    We display the empirical cumulative distribution function ECDF of
    the bootstrapped distribution of the runlength for an algorithm
    (in number of function evaluations) to reach the target functions 
    value :py:data:`targets`.

    :param DataSetList dsList: data set for one algorithm
    :param seq targets: target function values
    :param float crafting effort: the data will be multiplied by the
                                  exponential of this value
    :param dict kwargs: additional parameters provided to plot function.
    
    :returns: handles

    """
    if targets is None:
        targets = target_values  # set above or in config.py
    try:
        if np.min(targets) >= 1:
            ValueError(
                'smallest target f-value is not smaller than one, use ``pproc.TargetValues(targets)`` to prevent this error'
            )
        targets = pp.TargetValues(targets)
    except TypeError:
        pass
    res = []
    assert len(pp.DataSetList(
        dsList).dictByDim()) == 1  # We never integrate over dimensions...
    data = []
    maxevals = []
    for entry in dsList:
        for t in targets((entry.funcId, entry.dim)):
            divisor = entry.dim if divide_by_dimension else 1
            x = [np.inf] * perfprofsamplesize
            runlengthunsucc = []
            evals = entry.detEvals([t])[0]
            runlengthsucc = evals[np.isnan(evals) == False] / divisor
            runlengthunsucc = entry.maxevals[np.isnan(evals)] / divisor
            if len(runlengthsucc) > 0:
                x = toolsstats.drawSP(runlengthsucc,
                                      runlengthunsucc,
                                      percentiles=[50],
                                      samplesize=perfprofsamplesize)[1]
            data.extend(x)
            maxevals.extend(runlengthunsucc)

    # Display data
    data = np.array(data)
    data = data[np.isnan(data) == False]  # Take away the nans
    n = len(data)
    data = data[np.isinf(data) == False]  # Take away the infs
    # data = data[data <= maxval] # Take away rightmost data
    data = np.exp(craftingeffort) * data  # correction by crafting effort CrE
    if len(data) == 0:  # data is empty.
        res = pprldistr.plotECDF(np.array((1., )), n=np.inf, **kwargs)
    else:
        res = pprldistr.plotECDF(np.array(data), n=n, **kwargs)
        #plotdata(np.array(data), x_limit, maxevals,
        #                    CrE=0., **kwargs)
    if maxevals:  # Should cover the case where maxevals is None or empty
        x3 = np.median(maxevals)
        if np.any(data > x3):
            y3 = float(np.sum(data <= x3)) / n
            h = plt_plot((x3, ), (y3, ),
                         marker='x',
                         markersize=24,
                         markeredgewidth=3,
                         markeredgecolor=plt.getp(res[0], 'color'),
                         ls='',
                         color=plt.getp(res[0], 'color'))
            h.extend(res)
            res = h  # so the last element in res still has the label.
    return res
Example #4
0
    :param dict dictAlg: dictionary of data sets with algorithm name for
                         keys
    :param seq sortedAlgs: sequence for sorting the entries of
                           :py:data:`dictAlg`, if not provided,
                           dictAlg.keys() will be instead
    :returns: an instance of :py:class:`DataSetList` with the porfolio
              data sets

    """
    if not sortedAlg:
        sortedAlg = dictAlg.keys()
    tmpres = []
    for f, i in pp.dictAlgByFun(dictAlg).iteritems():
        for d, j in pp.dictAlgByDim(i).iteritems():
            tmp = []
            if sortedAlg:
                tmplist = list(j[k] for k in sortedAlg)
            else:
                tmplist = j.values()
            for k in tmplist:
                assert len(k) == 1 # one element list
                tmp.append(k[0])
            try:
                tmpres.append(DataSet(tmp))
            except Usage, err:
                print >>sys.stderr, err.msg
    res = pp.DataSetList()
    res.extend(tmpres)
    return res
    if len(dims) == 0:
        raise ValueError, ('No dimension(s) specified!')
    if len(funcs) == 0:
        raise ValueError, ('No function(s) specified!')

    # partition data since not all functions can be displayed in
    # one table
    partition = [1]
    half = len(funcs)
    if half > 12:
        partition.append(2)
        half = int(round(len(funcs) / 2))

    # create dataset
    datasetfull = pproc.DataSetList(directory, verbose=verboseflag)

    # loop over dimension and functions
    for dim in dims:

        # use partition
        for p in partition:

            # create list which contains min and median values across all
            # algorithms for all functions
            ftarget = list()

            for fun in funcs[0 + int((p - 1) * half):int(p * half)]:

                # create list which only contains entries with dimension = dim
                # for function = fun
Example #6
0
def plot(dsList, valuesOfInterest=values_of_interest, styles=styles):
    """From a DataSetList, plot a figure of ERT/dim vs dim.
    
    There will be one set of graphs per function represented in the
    input data sets. Most usually the data sets of different functions
    will be represented separately.
    
    :param DataSetList dsList: data sets
    :param seq valuesOfInterest: 
        target precisions via class TargetValues, there might 
        be as many graphs as there are elements in
        this input. Can be different for each
        function (a dictionary indexed by ifun). 
    
    :returns: handles

    """
    valuesOfInterest = pproc.TargetValues.cast(valuesOfInterest)
    styles = list(reversed(styles[:len(valuesOfInterest)]))
    dsList = pproc.DataSetList(dsList)
    dictFunc = dsList.dictByFunc()
    res = []

    for func in dictFunc:
        dictFunc[func] = dictFunc[func].dictByDim()
        dimensions = sorted(dictFunc[func])

        # legend = []
        line = []
        mediandata = {}
        displaynumber = {}
        for i_target in range(len(valuesOfInterest)):
            succ = []
            unsucc = []
            # data = []
            maxevals = np.ones(len(dimensions))
            maxevals_succ = np.ones(len(dimensions)) 
            # Collect data that have the same function and different dimension.
            for idim, dim in enumerate(dimensions):
                assert len(dictFunc[func][dim]) == 1
                # (ert, success rate, number of success, total number of
                #        function evaluations, median of successful runs)
                tmp = generateData(dictFunc[func][dim][0], valuesOfInterest((func, dim))[i_target])
                maxevals[idim] = max(dictFunc[func][dim][0].maxevals)
                # data.append(np.append(dim, tmp))
                if tmp[2] > 0:  # Number of success is larger than 0
                    succ.append(np.append(dim, tmp))
                    if tmp[2] < dictFunc[func][dim][0].nbRuns():
                        displaynumber[dim] = ((dim, tmp[0], tmp[2]))
                    mediandata[dim] = (i_target, tmp[-1])
                    unsucc.append(np.append(dim, np.nan))
                else:
                    unsucc.append(np.append(dim, tmp[-2]))  # total number of fevals

            if len(succ) > 0:
                tmp = np.vstack(succ)
                # ERT
                if genericsettings.scaling_figures_with_boxes:
                    for dim in dimensions: 
                        # to find finite simulated runlengths we need to have at least one successful run
                        if dictFunc[func][dim][0].detSuccesses([valuesOfInterest((func, dim))[i_target]])[0]:
                            # make a box-plot
                            y = toolsstats.drawSP_from_dataset(
                                                dictFunc[func][dim][0],
                                                valuesOfInterest((func, dim))[i_target],
                                                [25, 50, 75], 
                                                genericsettings.simulated_runlength_bootstrap_sample_size)[0]
                            rec_width = 1.1 # box ("rectangle") width
                            rec_taille_fac = 0.3  # notch width parameter
                            r = rec_width ** ((1. + i_target / 3.) / 4)  # more difficult targets get a wider box
                            styles2 = {}
                            for s in styles[i_target]:
                                styles2[s] = styles[i_target][s]
                            styles2['linewidth'] = 1
                            styles2['markeredgecolor'] = styles2['color'] 
                            x = [dim / r, r * dim]
                            xm = [dim / (r**rec_taille_fac), dim * (r**rec_taille_fac)]
                            y = np.array(y) / dim
                            plt.plot([x[0], xm[0], x[0], x[1], xm[1], x[1], x[0]],
                                     [y[0], y[1],  y[2], y[2], y[1],  y[0], y[0]],
                                     markersize=0, **styles2)
                            styles2['linewidth'] = 0
                            plt.plot([x[0], x[1], x[1], x[0], x[0]],
                                     [y[0], y[0], y[2], y[2], y[0]],
                                     **styles2)
                            styles2['linewidth'] = 2  # median
                            plt.plot([x[0], x[1]], [y[1], y[1]],
                                     markersize=0, **styles2)
                # plot lines, we have to be smart to connect only adjacent dimensions
                for i, n in enumerate(tmp[:, 0]):
                    j = list(dimensions).index(n)
                    if i == len(tmp[:, 0]) - 1 or j == len(dimensions) - 1: 
                        break
                    if dimensions[j+1] == tmp[i+1, 0]:
                        res.extend(plt.plot(tmp[i:i+2, 0], tmp[i:i+2, 1] / tmp[i:i+2, 0]**ynormalize_by_dimension,
                                            markersize=0, clip_on=True, **styles[i_target]))
                # plot only marker
                lw = styles[i_target].get('linewidth', None) 
                styles[i_target]['linewidth'] = 0
                res.extend(plt.plot(tmp[:, 0], tmp[:, 1] / tmp[:, 0]**ynormalize_by_dimension,
                           markersize=20, clip_on=True, **styles[i_target]))
                # restore linewidth
                if lw:
                    styles[i_target]['linewidth'] = lw
                else:
                    del styles[i_target]['linewidth']

        # To have the legend displayed whatever happens with the data.
        for i in reversed(range(len(valuesOfInterest))):
            res.extend(plt.plot([], [], markersize=10,
                                label=valuesOfInterest.label(i) if isinstance(valuesOfInterest, pproc.RunlengthBasedTargetValues) else valuesOfInterest.loglabel(i),
                                **styles[i]))
        # Only for the last target function value
        if unsucc:  # obsolete
            tmp = np.vstack(unsucc)  # tmp[:, 0] needs to be sorted!
            # res.extend(plt.plot(tmp[:, 0], tmp[:, 1]/tmp[:, 0],
            #            color=styles[len(valuesOfInterest)-1]['color'],
            #            marker='x', markersize=20))
        if 1 < 3: # maxevals
            ylim = plt.ylim()
            res.extend(plt.plot(tmp[:, 0], maxevals / tmp[:, 0]**ynormalize_by_dimension,
                       color=styles[len(valuesOfInterest) - 1]['color'],
                       ls='', marker='x', markersize=20))
            plt.ylim(ylim)
        # median
        if mediandata:
            # for i, tm in mediandata.iteritems():
            for i in displaynumber:  # display median where success prob is smaller than one
                tm = mediandata[i]
                plt.plot((i,), (tm[1] / i**ynormalize_by_dimension,), 
                         color=styles[tm[0]]['color'],
                         linestyle='', marker='+', markersize=30,
                         markeredgewidth=5, zorder= -1)

        a = plt.gca()
        # the displaynumber is emptied for each new target precision
        # therefore the displaynumber displayed below correspond to the
        # last target (must be the hardest)
        if displaynumber:  # displayed only for the smallest valuesOfInterest
            for _k, j in displaynumber.iteritems():
                # the 1.5 factor is a shift up for the digits 
                plt.text(j[0], 1.5 * j[1] / j[0]**ynormalize_by_dimension, 
                         "%.0f" % j[2], axes=a,
                         horizontalalignment="center",
                         verticalalignment="bottom", fontsize=plt.rcParams['font.size'] * 0.85)
        # if later the ylim[0] becomes >> 1, this might be a problem
    return res