def load(filename): """Create a :py:class:`DataSetList` instance from a file or folder. Input argument filename can be a single :file:`info` file name, a single pickle filename or a folder name. In the latter case, the folder is browsed recursively for :file:`info` or :file:`pickle` files. """ return pproc.DataSetList(filename)
def plot(dsList, targets=single_target_values, **plotArgs): """Plot ECDF of evaluations and final function values in a single figure for demonstration purposes.""" # targets = targets() # TODO: this needs to be rectified # targets = targets.target_values dsList = pproc.DataSetList(dsList) assert len(dsList.dictByDim()) == 1, ('Cannot display different ' 'dimensionalities together') res = [] plt.subplot(121) maxEvalsFactor = max(i.mMaxEvals() / i.dim for i in dsList) evalfmax = maxEvalsFactor for j in range(len(targets)): tmpplotArgs = dict(plotArgs, **rldStyles[j % len(rldStyles)]) tmp = plotRLDistr(dsList, lambda fun_dim: targets(fun_dim)[j], **tmpplotArgs) res.extend(tmp) res.append(plt.axvline(x=maxEvalsFactor, color='k', **plotArgs)) funcs = list(i.funcId for i in dsList) text = 'f%s' % (consecutiveNumbers(sorted(funcs))) res.append( plt.text(0.5, 0.98, text, horizontalalignment="center", verticalalignment="top", transform=plt.gca().transAxes)) plt.subplot(122) for j in [range(len(targets))[-1]]: tmpplotArgs = dict(plotArgs, **rldStyles[j % len(rldStyles)]) tmp = plotFVDistr(dsList, evalfmax, lambda fun_dim: targets(fun_dim)[j], **tmpplotArgs) res.extend(tmp) tmp = np.floor(np.log10(evalfmax)) # coloring right to left: maxEvalsF = np.power(10, np.arange(0, tmp)) for j in range(len(maxEvalsF)): tmpplotArgs = dict(plotArgs, **rldUnsuccStyles[j % len(rldUnsuccStyles)]) tmp = plotFVDistr(dsList, maxEvalsF[j], lambda fun_dim: targets(fun_dim)[-1], **tmpplotArgs) res.extend(tmp) res.append( plt.text(0.98, 0.02, text, horizontalalignment="right", transform=plt.gca().transAxes)) return res
def plot(dsList, targets=None, craftingeffort=0., **kwargs): """This function is obsolete? Generates a graph of the run length distribution of an algorithm. We display the empirical cumulative distribution function ECDF of the bootstrapped distribution of the runlength for an algorithm (in number of function evaluations) to reach the target functions value :py:data:`targets`. :param DataSetList dsList: data set for one algorithm :param seq targets: target function values :param float crafting effort: the data will be multiplied by the exponential of this value :param dict kwargs: additional parameters provided to plot function. :returns: handles """ if targets is None: targets = target_values # set above or in config.py try: if np.min(targets) >= 1: ValueError( 'smallest target f-value is not smaller than one, use ``pproc.TargetValues(targets)`` to prevent this error' ) targets = pp.TargetValues(targets) except TypeError: pass res = [] assert len(pp.DataSetList( dsList).dictByDim()) == 1 # We never integrate over dimensions... data = [] maxevals = [] for entry in dsList: for t in targets((entry.funcId, entry.dim)): divisor = entry.dim if divide_by_dimension else 1 x = [np.inf] * perfprofsamplesize runlengthunsucc = [] evals = entry.detEvals([t])[0] runlengthsucc = evals[np.isnan(evals) == False] / divisor runlengthunsucc = entry.maxevals[np.isnan(evals)] / divisor if len(runlengthsucc) > 0: x = toolsstats.drawSP(runlengthsucc, runlengthunsucc, percentiles=[50], samplesize=perfprofsamplesize)[1] data.extend(x) maxevals.extend(runlengthunsucc) # Display data data = np.array(data) data = data[np.isnan(data) == False] # Take away the nans n = len(data) data = data[np.isinf(data) == False] # Take away the infs # data = data[data <= maxval] # Take away rightmost data data = np.exp(craftingeffort) * data # correction by crafting effort CrE if len(data) == 0: # data is empty. res = pprldistr.plotECDF(np.array((1., )), n=np.inf, **kwargs) else: res = pprldistr.plotECDF(np.array(data), n=n, **kwargs) #plotdata(np.array(data), x_limit, maxevals, # CrE=0., **kwargs) if maxevals: # Should cover the case where maxevals is None or empty x3 = np.median(maxevals) if np.any(data > x3): y3 = float(np.sum(data <= x3)) / n h = plt_plot((x3, ), (y3, ), marker='x', markersize=24, markeredgewidth=3, markeredgecolor=plt.getp(res[0], 'color'), ls='', color=plt.getp(res[0], 'color')) h.extend(res) res = h # so the last element in res still has the label. return res
:param dict dictAlg: dictionary of data sets with algorithm name for keys :param seq sortedAlgs: sequence for sorting the entries of :py:data:`dictAlg`, if not provided, dictAlg.keys() will be instead :returns: an instance of :py:class:`DataSetList` with the porfolio data sets """ if not sortedAlg: sortedAlg = dictAlg.keys() tmpres = [] for f, i in pp.dictAlgByFun(dictAlg).iteritems(): for d, j in pp.dictAlgByDim(i).iteritems(): tmp = [] if sortedAlg: tmplist = list(j[k] for k in sortedAlg) else: tmplist = j.values() for k in tmplist: assert len(k) == 1 # one element list tmp.append(k[0]) try: tmpres.append(DataSet(tmp)) except Usage, err: print >>sys.stderr, err.msg res = pp.DataSetList() res.extend(tmpres) return res
if len(dims) == 0: raise ValueError, ('No dimension(s) specified!') if len(funcs) == 0: raise ValueError, ('No function(s) specified!') # partition data since not all functions can be displayed in # one table partition = [1] half = len(funcs) if half > 12: partition.append(2) half = int(round(len(funcs) / 2)) # create dataset datasetfull = pproc.DataSetList(directory, verbose=verboseflag) # loop over dimension and functions for dim in dims: # use partition for p in partition: # create list which contains min and median values across all # algorithms for all functions ftarget = list() for fun in funcs[0 + int((p - 1) * half):int(p * half)]: # create list which only contains entries with dimension = dim # for function = fun
def plot(dsList, valuesOfInterest=values_of_interest, styles=styles): """From a DataSetList, plot a figure of ERT/dim vs dim. There will be one set of graphs per function represented in the input data sets. Most usually the data sets of different functions will be represented separately. :param DataSetList dsList: data sets :param seq valuesOfInterest: target precisions via class TargetValues, there might be as many graphs as there are elements in this input. Can be different for each function (a dictionary indexed by ifun). :returns: handles """ valuesOfInterest = pproc.TargetValues.cast(valuesOfInterest) styles = list(reversed(styles[:len(valuesOfInterest)])) dsList = pproc.DataSetList(dsList) dictFunc = dsList.dictByFunc() res = [] for func in dictFunc: dictFunc[func] = dictFunc[func].dictByDim() dimensions = sorted(dictFunc[func]) # legend = [] line = [] mediandata = {} displaynumber = {} for i_target in range(len(valuesOfInterest)): succ = [] unsucc = [] # data = [] maxevals = np.ones(len(dimensions)) maxevals_succ = np.ones(len(dimensions)) # Collect data that have the same function and different dimension. for idim, dim in enumerate(dimensions): assert len(dictFunc[func][dim]) == 1 # (ert, success rate, number of success, total number of # function evaluations, median of successful runs) tmp = generateData(dictFunc[func][dim][0], valuesOfInterest((func, dim))[i_target]) maxevals[idim] = max(dictFunc[func][dim][0].maxevals) # data.append(np.append(dim, tmp)) if tmp[2] > 0: # Number of success is larger than 0 succ.append(np.append(dim, tmp)) if tmp[2] < dictFunc[func][dim][0].nbRuns(): displaynumber[dim] = ((dim, tmp[0], tmp[2])) mediandata[dim] = (i_target, tmp[-1]) unsucc.append(np.append(dim, np.nan)) else: unsucc.append(np.append(dim, tmp[-2])) # total number of fevals if len(succ) > 0: tmp = np.vstack(succ) # ERT if genericsettings.scaling_figures_with_boxes: for dim in dimensions: # to find finite simulated runlengths we need to have at least one successful run if dictFunc[func][dim][0].detSuccesses([valuesOfInterest((func, dim))[i_target]])[0]: # make a box-plot y = toolsstats.drawSP_from_dataset( dictFunc[func][dim][0], valuesOfInterest((func, dim))[i_target], [25, 50, 75], genericsettings.simulated_runlength_bootstrap_sample_size)[0] rec_width = 1.1 # box ("rectangle") width rec_taille_fac = 0.3 # notch width parameter r = rec_width ** ((1. + i_target / 3.) / 4) # more difficult targets get a wider box styles2 = {} for s in styles[i_target]: styles2[s] = styles[i_target][s] styles2['linewidth'] = 1 styles2['markeredgecolor'] = styles2['color'] x = [dim / r, r * dim] xm = [dim / (r**rec_taille_fac), dim * (r**rec_taille_fac)] y = np.array(y) / dim plt.plot([x[0], xm[0], x[0], x[1], xm[1], x[1], x[0]], [y[0], y[1], y[2], y[2], y[1], y[0], y[0]], markersize=0, **styles2) styles2['linewidth'] = 0 plt.plot([x[0], x[1], x[1], x[0], x[0]], [y[0], y[0], y[2], y[2], y[0]], **styles2) styles2['linewidth'] = 2 # median plt.plot([x[0], x[1]], [y[1], y[1]], markersize=0, **styles2) # plot lines, we have to be smart to connect only adjacent dimensions for i, n in enumerate(tmp[:, 0]): j = list(dimensions).index(n) if i == len(tmp[:, 0]) - 1 or j == len(dimensions) - 1: break if dimensions[j+1] == tmp[i+1, 0]: res.extend(plt.plot(tmp[i:i+2, 0], tmp[i:i+2, 1] / tmp[i:i+2, 0]**ynormalize_by_dimension, markersize=0, clip_on=True, **styles[i_target])) # plot only marker lw = styles[i_target].get('linewidth', None) styles[i_target]['linewidth'] = 0 res.extend(plt.plot(tmp[:, 0], tmp[:, 1] / tmp[:, 0]**ynormalize_by_dimension, markersize=20, clip_on=True, **styles[i_target])) # restore linewidth if lw: styles[i_target]['linewidth'] = lw else: del styles[i_target]['linewidth'] # To have the legend displayed whatever happens with the data. for i in reversed(range(len(valuesOfInterest))): res.extend(plt.plot([], [], markersize=10, label=valuesOfInterest.label(i) if isinstance(valuesOfInterest, pproc.RunlengthBasedTargetValues) else valuesOfInterest.loglabel(i), **styles[i])) # Only for the last target function value if unsucc: # obsolete tmp = np.vstack(unsucc) # tmp[:, 0] needs to be sorted! # res.extend(plt.plot(tmp[:, 0], tmp[:, 1]/tmp[:, 0], # color=styles[len(valuesOfInterest)-1]['color'], # marker='x', markersize=20)) if 1 < 3: # maxevals ylim = plt.ylim() res.extend(plt.plot(tmp[:, 0], maxevals / tmp[:, 0]**ynormalize_by_dimension, color=styles[len(valuesOfInterest) - 1]['color'], ls='', marker='x', markersize=20)) plt.ylim(ylim) # median if mediandata: # for i, tm in mediandata.iteritems(): for i in displaynumber: # display median where success prob is smaller than one tm = mediandata[i] plt.plot((i,), (tm[1] / i**ynormalize_by_dimension,), color=styles[tm[0]]['color'], linestyle='', marker='+', markersize=30, markeredgewidth=5, zorder= -1) a = plt.gca() # the displaynumber is emptied for each new target precision # therefore the displaynumber displayed below correspond to the # last target (must be the hardest) if displaynumber: # displayed only for the smallest valuesOfInterest for _k, j in displaynumber.iteritems(): # the 1.5 factor is a shift up for the digits plt.text(j[0], 1.5 * j[1] / j[0]**ynormalize_by_dimension, "%.0f" % j[2], axes=a, horizontalalignment="center", verticalalignment="bottom", fontsize=plt.rcParams['font.size'] * 0.85) # if later the ylim[0] becomes >> 1, this might be a problem return res