Esempio n. 1
0
def computeERT(fevals, maxevals):
    data = fevals.copy()
    success = (numpy.isnan(data) == False)
    if any(numpy.isnan(data)):
        data[numpy.isnan(data)] = maxevals[numpy.isnan(data)]
    res = toolsstats.sp(data, issuccessful=success)
    return res[0]
Esempio n. 2
0
 def computeERT(hdata, maxevals):
     res = []
     for i in hdata:
         data = i.copy()
         data = data[1:]
         succ = (np.isnan(data) == False)
         if any(np.isnan(data)):
             data[np.isnan(data)] = maxevals[np.isnan(data)]
         tmp = [i[0]]
         tmp.extend(toolsstats.sp(data, issuccessful=succ))
         res.append(tmp)
     return np.vstack(res)
Esempio n. 3
0
def generateData(dataSet, target):
    """Returns an array of results to be plotted.

    Oth column is ert, 1st is the success rate, 2nd the number of
    successes, 3rd the mean of the number of function evaluations, and
    4th the median of number of function evaluations of successful runs
    or numpy.nan.

    """
    res = []

    data = dataSet.detEvals([target])[0]
    succ = (numpy.isnan(data) == False)
    data[numpy.isnan(data)] = dataSet.maxevals[numpy.isnan(data)]
    res.extend(toolsstats.sp(data, issuccessful=succ, allowinf=False))
    res.append(numpy.mean(data))
    if res[2] > 0:
        res.append(toolsstats.prctile(data[succ], 50)[0])
    else:
        res.append(numpy.nan)
    res[3] = numpy.max(dataSet.maxevals)
    return res
Esempio n. 4
0
def generateData(dataSet, targetFuncValue):
    """Returns an array of results to be plotted.

    1st column is ert, 2nd is  the number of success, 3rd the success
    rate, 4th the sum of the number of  function evaluations, and
    finally the median on successful runs.

    """
    res = []
    data = []

    it = iter(reversed(dataSet.evals))
    i = it.next()
    prev = np.array([np.nan] * len(i))

    while i[0] <= targetFuncValue:
        prev = i
        try:
            i = it.next()
        except StopIteration:
            break

    data = prev[1:].copy()  # keep only the number of function evaluations.
    succ = (np.isnan(data) == False)
    if succ.any():
        med = toolsstats.prctile(data[succ], 50)[0]
        #Line above was modified at rev 3050 to make sure that we consider only
        #successful trials in the median
    else:
        med = np.nan

    data[np.isnan(data)] = dataSet.maxevals[np.isnan(data)]

    res = []
    res.extend(toolsstats.sp(data, issuccessful=succ, allowinf=False))
    res.append(np.mean(data))  #mean(FE)
    res.append(med)

    return np.array(res)
Esempio n. 5
0
def main(dsList0, dsList1, dimsOfInterest, outputdir, info='', verbose=True):
    """One table per dimension, modified to fit in 1 page per table."""

    #TODO: method is long, split if possible

    dictDim0 = dsList0.dictByDim()
    dictDim1 = dsList1.dictByDim()

    alg0 = set(i[0] for i in dsList0.dictByAlg().keys()).pop()[0:3]
    alg1 = set(i[0] for i in dsList1.dictByAlg().keys()).pop()[0:3]

    open(os.path.join(outputdir, 'bbob_pproc_commands.tex'), 'a').write(
        r'\providecommand{\algorithmAshort}{%s}' % writeLabels(alg0) + '\n' +
        r'\providecommand{\algorithmBshort}{%s}' % writeLabels(alg1) + '\n')

    if info:
        info = '_' + info

    dims = set.intersection(set(dictDim0.keys()), set(dictDim1.keys()))
    if not bestalg.bestalgentries2009:
        bestalg.loadBBOB2009()

    header = []
    if isinstance(targetsOfInterest, pproc.RunlengthBasedTargetValues):
        header = [r'\#FEs/D']
        for label in targetsOfInterest.labels():
            header.append(r'\multicolumn{2}{@{}c@{}}{%s}' % label)
    else:
        header = [r'$\Delta f_\mathrm{opt}$']
        for label in targetsOfInterest.labels():
            header.append(r'\multicolumn{2}{@{\,}c@{\,}}{%s}' % label)
    header.append(r'\multicolumn{2}{@{}l@{}}{\#succ}')

    for d in dimsOfInterest:  # TODO set as input arguments
        table = [header]
        extraeol = [r'\hline']
        try:
            dictFunc0 = dictDim0[d].dictByFunc()
            dictFunc1 = dictDim1[d].dictByFunc()
        except KeyError:
            continue
        funcs = set.union(set(dictFunc0.keys()), set(dictFunc1.keys()))

        nbtests = len(funcs) * 2.  #len(dimsOfInterest)

        for f in sorted(funcs):
            targets = targetsOfInterest((f, d))
            targetf = targets[-1]

            bestalgentry = bestalg.bestalgentries2009[(d, f)]
            curline = [r'${\bf f_{%d}}$' % f]
            bestalgdata = bestalgentry.detERT(targets)
            bestalgevals, bestalgalgs = bestalgentry.detEvals(targets)

            if isinstance(targetsOfInterest, pproc.RunlengthBasedTargetValues):
                # write ftarget:fevals
                for i in range(len(bestalgdata[:-1])):
                    temp = "%.1e" % targetsOfInterest((f, d))[i]
                    if temp[-2] == "0":
                        temp = temp[:-2] + temp[-1]
                    curline.append(
                        r'\multicolumn{2}{@{}c@{}}{\textit{%s}:%s \quad}' %
                        (temp, writeFEvalsMaxPrec(bestalgdata[i], 2)))
                temp = "%.1e" % targetsOfInterest((f, d))[-1]
                if temp[-2] == "0":
                    temp = temp[:-2] + temp[-1]
                curline.append(r'\multicolumn{2}{@{}c@{}|}{\textit{%s}:%s }' %
                               (temp, writeFEvalsMaxPrec(bestalgdata[-1], 2)))
            else:
                # write #fevals of the reference alg
                for i in bestalgdata[:-1]:
                    curline.append(r'\multicolumn{2}{@{}c@{}}{%s \quad}' %
                                   writeFEvalsMaxPrec(i, 2))
                curline.append(r'\multicolumn{2}{@{}c@{}|}{%s}' %
                               writeFEvalsMaxPrec(bestalgdata[-1], 2))

            tmp = bestalgentry.detEvals([targetf])[0][0]
            tmp2 = numpy.sum(numpy.isnan(tmp) == False)
            curline.append('%d' % (tmp2))
            if tmp2 > 0:
                curline.append('/%d' % len(tmp))

            table.append(curline[:])
            extraeol.append('')

            rankdata0 = []  # never used

            # generate all data from ranksum test
            entries = []
            ertdata = {}
            for nb, dsList in enumerate((dictFunc0, dictFunc1)):
                try:
                    entry = dsList[f][
                        0]  # take the first DataSet, there should be only one?
                except KeyError:
                    warnings.warn('data missing for data set ' + str(nb) +
                                  ' and function ' + str(f))
                    print('*** Warning: data missing for data set ' + str(nb) +
                          ' and function ' + str(f) + '***')
                    continue  # TODO: problem here!
                ertdata[nb] = entry.detERT(targets)
                entries.append(entry)

            for _t in ertdata.values():
                for _tt in _t:
                    if _tt is None:
                        raise ValueError

            if len(entries) < 2:  # funcion not available for *both* algorithms
                continue  # TODO: check which one is missing and make sure that what is there is displayed properly in the following

            testres0vs1 = significancetest(entries[0], entries[1], targets)
            testresbestvs1 = significancetest(bestalgentry, entries[1],
                                              targets)
            testresbestvs0 = significancetest(bestalgentry, entries[0],
                                              targets)

            for nb, entry in enumerate(entries):
                if nb == 0:
                    curline = [r'1:\:\algorithmAshort\hspace*{\fill}']
                else:
                    curline = [r'2:\:\algorithmBshort\hspace*{\fill}']

                #data = entry.detERT(targetsOfInterest)
                dispersion = []
                data = []
                evals = entry.detEvals(targets)
                for i in evals:
                    succ = (numpy.isnan(i) == False)
                    tmp = i.copy()
                    tmp[succ == False] = entry.maxevals[numpy.isnan(i)]
                    #set_trace()
                    data.append(toolsstats.sp(tmp, issuccessful=succ)[0])
                    #if not any(succ):
                    #set_trace()
                    if any(succ):
                        tmp2 = toolsstats.drawSP(tmp[succ], tmp[succ == False],
                                                 (10, 50, 90), samplesize)[0]
                        dispersion.append((tmp2[-1] - tmp2[0]) / 2.)
                    else:
                        dispersion.append(None)

                if nb == 0:
                    assert not isinstance(data, numpy.ndarray)
                    data0 = data[:]  # TODO: check if it is not an array, it's never used anyway?

                for i, dati in enumerate(data):

                    z, p = testres0vs1[
                        i]  # TODO: there is something with the sign that I don't get
                    # assign significance flag, which is the -log10(p)
                    significance0vs1 = 0
                    if nb != 0:
                        z = -z  # the test is symmetric
                    if nbtests * p < 0.05 and z > 0:
                        significance0vs1 = -int(
                            numpy.ceil(numpy.log10(min([
                                1.0, nbtests * p
                            ]))))  # this is the larger the more significant

                    isBold = significance0vs1 > 0
                    alignment = 'c'
                    if i == len(data) - 1:  # last element
                        alignment = 'c|'

                    if numpy.isinf(
                            bestalgdata[i]
                    ):  # if the 2009 best did not solve the problem

                        tmp = writeFEvalsMaxPrec(float(dati), 2)
                        if not numpy.isinf(dati):
                            tmp = r'\textit{%s}' % (tmp)
                            if isBold:
                                tmp = r'\textbf{%s}' % tmp

                        if dispersion[i] and numpy.isfinite(dispersion[i]):
                            tmp += r'${\scriptscriptstyle (%s)}$' % writeFEvalsMaxPrec(
                                dispersion[i], 1)
                        tableentry = (r'\multicolumn{2}{@{}%s@{}}{%s}' %
                                      (alignment, tmp))
                    else:
                        # Formatting
                        tmp = float(dati) / bestalgdata[i]
                        assert not numpy.isnan(tmp)
                        isscientific = False
                        if tmp >= 1000:
                            isscientific = True
                        tableentry = writeFEvals2(tmp,
                                                  2,
                                                  isscientific=isscientific)
                        tableentry = writeFEvalsMaxPrec(tmp, 2)

                        if numpy.isinf(tmp) and i == len(data) - 1:
                            tableentry = (
                                tableentry + r'\textit{%s}' %
                                writeFEvals2(numpy.median(entry.maxevals), 2))
                            if isBold:
                                tableentry = r'\textbf{%s}' % tableentry
                            elif 11 < 3 and significance0vs1 < 0:  # cave: negative significance has no meaning anymore
                                tableentry = r'\textit{%s}' % tableentry
                            if dispersion[i] and numpy.isfinite(
                                    dispersion[i] / bestalgdata[i]):
                                tableentry += r'${\scriptscriptstyle (%s)}$' % writeFEvalsMaxPrec(
                                    dispersion[i] / bestalgdata[i], 1)
                            tableentry = (r'\multicolumn{2}{@{}%s@{}}{%s}' %
                                          (alignment, tableentry))

                        elif tableentry.find('e') > -1 or (numpy.isinf(tmp) and
                                                           i != len(data) - 1):
                            if isBold:
                                tableentry = r'\textbf{%s}' % tableentry
                            elif 11 < 3 and significance0vs1 < 0:
                                tableentry = r'\textit{%s}' % tableentry
                            if dispersion[i] and numpy.isfinite(
                                    dispersion[i] / bestalgdata[i]):
                                tableentry += r'${\scriptscriptstyle (%s)}$' % writeFEvalsMaxPrec(
                                    dispersion[i] / bestalgdata[i], 1)
                            tableentry = (r'\multicolumn{2}{@{}%s@{}}{%s}' %
                                          (alignment, tableentry))
                        else:
                            tmp = tableentry.split('.', 1)
                            if isBold:
                                tmp = list(r'\textbf{%s}' % i for i in tmp)
                            elif 11 < 3 and significance0vs1 < 0:
                                tmp = list(r'\textit{%s}' % i for i in tmp)
                            tableentry = ' & .'.join(tmp)
                            if len(tmp) == 1:
                                tableentry += '&'
                            if dispersion[i] and numpy.isfinite(
                                    dispersion[i] / bestalgdata[i]):
                                tableentry += r'${\scriptscriptstyle (%s)}$' % writeFEvalsMaxPrec(
                                    dispersion[i] / bestalgdata[i], 1)

                    superscript = ''

                    if nb == 0:
                        z, p = testresbestvs0[i]
                    else:
                        z, p = testresbestvs1[i]

                    #The conditions are now that ERT < ERT_best
                    if ((nbtests * p) < 0.05 and dati - bestalgdata[i] < 0.
                            and z < 0.):
                        nbstars = -numpy.ceil(numpy.log10(nbtests * p))
                        #tmp = '\hspace{-.5ex}'.join(nbstars * [r'\star'])
                        if z > 0:
                            superscript = r'\uparrow'  #* nbstars
                        else:
                            superscript = r'\downarrow'  #* nbstars
                            # print z, linebest[i], line1
                        if nbstars > 1:
                            superscript += str(int(nbstars))

                    if superscript or significance0vs1:
                        s = ''
                        if significance0vs1 > 0:
                            s = '\star'
                        if significance0vs1 > 1:
                            s += str(significance0vs1)
                        s = r'$^{' + s + superscript + r'}$'

                        if tableentry.endswith('}'):
                            tableentry = tableentry[:-1] + s + r'}'
                        else:
                            tableentry += s

                    curline.append(tableentry)

                    #curline.append(tableentry)
                    #if dispersion[i] is None or numpy.isinf(bestalgdata[i]):
                    #curline.append('')
                    #else:
                    #tmp = writeFEvalsMaxPrec(dispersion[i]/bestalgdata[i], 2)
                    #curline.append('(%s)' % tmp)

                tmp = entry.evals[entry.evals[:, 0] <= targetf, 1:]
                try:
                    tmp = tmp[0]
                    curline.append('%d' % numpy.sum(numpy.isnan(tmp) == False))
                except IndexError:
                    curline.append('%d' % 0)
                curline.append('/%d' % entry.nbRuns())

                table.append(curline[:])
                extraeol.append('')

            extraeol[-1] = r'\hline'
        extraeol[-1] = ''

        outputfile = os.path.join(outputdir,
                                  'pptable2_%02dD%s.tex' % (d, info))
        spec = r'@{}c@{}|' + '*{%d}{@{}r@{}@{}l@{}}' % len(
            targetsOfInterest) + '|@{}r@{}@{}l@{}'
        res = r'\providecommand{\algorithmAshort}{%s}' % writeLabels(
            alg0) + '\n'
        res += r'\providecommand{\algorithmBshort}{%s}' % writeLabels(
            alg1) + '\n'
        # open(os.path.join(outputdir, 'bbob_pproc_commands.tex'), 'a').write(res)

        #res += tableLaTeXStar(table, width=r'0.45\textwidth', spec=spec,
        #extraeol=extraeol)
        res += tableLaTeX(table, spec=spec, extraeol=extraeol)
        f = open(outputfile, 'w')
        f.write(res)
        f.close()
        if verbose:
            print("Table written in %s" % outputfile)
Esempio n. 6
0
def main(dictAlg,
         sortedAlgs,
         outputdir='.',
         verbose=True,
         function_targets_line=True):  # [1, 13, 101]
    """Generate one table per func with results of multiple algorithms."""
    """Difference with the first version:

    * numbers aligned using the decimal separator
    * premices for dispersion measure
    * significance test against best algorithm
    * table width...

    Takes ``targetsOfInterest`` from this file as "input argument" to compute
    the desired target values. ``targetsOfInterest`` might be configured via
    config.

    """

    # TODO: method is long, terrible to read, split if possible

    if not bestalg.bestalgentries2009:
        bestalg.loadBBOB2009()

    # Sort data per dimension and function
    dictData = {}
    dsListperAlg = list(dictAlg[i] for i in sortedAlgs)
    for n, entries in enumerate(dsListperAlg):
        tmpdictdim = entries.dictByDim()
        for d in tmpdictdim:
            tmpdictfun = tmpdictdim[d].dictByFunc()
            for f in tmpdictfun:
                dictData.setdefault((d, f), {})[n] = tmpdictfun[f]

    nbtests = len(dictData)

    for df in dictData:
        # Generate one table per df
        # first update targets for each dimension-function pair if needed:
        targets = targetsOfInterest((df[1], df[0]))
        targetf = targets[-1]

        # best 2009
        refalgentry = bestalg.bestalgentries2009[df]
        refalgert = refalgentry.detERT(targets)
        refalgevals = (refalgentry.detEvals((targetf, ))[0][0])
        refalgnbruns = len(refalgevals)
        refalgnbsucc = numpy.sum(numpy.isnan(refalgevals) == False)

        # Process the data
        # The following variables will be lists of elements each corresponding
        # to an algorithm
        algnames = []
        #algdata = []
        algerts = []
        algevals = []
        algdisp = []
        algnbsucc = []
        algnbruns = []
        algmedmaxevals = []
        algmedfinalfunvals = []
        algtestres = []
        algentries = []

        for n in sorted(dictData[df].keys()):
            entries = dictData[df][n]
            # the number of datasets for a given dimension and function (df)
            # should be strictly 1. TODO: find a way to warn
            # TODO: do this checking before... why wasn't it triggered by ppperprof?
            if len(entries) > 1:
                txt = ("There is more than a single entry associated with "
                       "folder %s on %d-D f%d." %
                       (sortedAlgs[n], df[0], df[1]))
                raise Exception(txt)

            entry = entries[0]
            algentries.append(entry)

            algnames.append(sortedAlgs[n])

            evals = entry.detEvals(targets)
            #tmpdata = []
            tmpdisp = []
            tmpert = []
            for i, e in enumerate(evals):
                succ = (numpy.isnan(e) == False)
                ec = e.copy(
                )  # note: here was the previous bug (changes made in e also appeared in evals !)
                ec[succ == False] = entry.maxevals[succ == False]
                ert = toolsstats.sp(ec, issuccessful=succ)[0]
                #tmpdata.append(ert/refalgert[i])
                if succ.any():
                    tmp = toolsstats.drawSP(ec[succ],
                                            entry.maxevals[succ == False],
                                            [10, 50, 90],
                                            samplesize=samplesize)[0]
                    tmpdisp.append((tmp[-1] - tmp[0]) / 2.)
                else:
                    tmpdisp.append(numpy.nan)
                tmpert.append(ert)
            algerts.append(tmpert)
            algevals.append(evals)
            #algdata.append(tmpdata)
            algdisp.append(tmpdisp)
            algmedmaxevals.append(numpy.median(entry.maxevals))
            algmedfinalfunvals.append(numpy.median(entry.finalfunvals))
            #algmedmaxevals.append(numpy.median(entry.maxevals)/df[0])
            #algmedfinalfunvals.append(numpy.median(entry.finalfunvals))

            algtestres.append(significancetest(refalgentry, entry, targets))

            # determine success probability for Df = 1e-8
            e = entry.detEvals((targetf, ))[0]
            algnbsucc.append(numpy.sum(numpy.isnan(e) == False))
            algnbruns.append(len(e))

        # Process over all data
        # find best values...

        nalgs = len(dictData[df])
        maxRank = 1 + numpy.floor(
            0.14 * nalgs)  # number of algs to be displayed in bold

        isBoldArray = []  # Point out the best values
        algfinaldata = [
        ]  # Store median function values/median number of function evaluations
        tmptop = getTopIndicesOfColumns(algerts, maxRank=maxRank)
        for i, erts in enumerate(algerts):
            tmp = []
            for j, ert in enumerate(erts):  # algi targetj
                tmp.append(i in tmptop[j] or
                           (nalgs > 7 and algerts[i][j] <= 3. * refalgert[j]))
            isBoldArray.append(tmp)
            algfinaldata.append((algmedfinalfunvals[i], algmedmaxevals[i]))

        # significance test of best given algorithm against all others
        best_alg_idx = numpy.array(algerts).argsort(0)[
            0, :]  # indexed by target index
        significance_versus_others = significance_all_best_vs_other(
            algentries, targets, best_alg_idx)[0]

        # Create the table
        table = []
        spec = r'@{}c@{}|*{%d}{@{\,}r@{}X@{\,}}|@{}r@{}@{}l@{}' % (
            len(targets)
        )  # in case StrLeft not working: replaced c@{} with l@{ }
        spec = r'@{}c@{}|*{%d}{@{}r@{}X@{}}|@{}r@{}@{}l@{}' % (
            len(targets)
        )  # in case StrLeft not working: replaced c@{} with l@{ }
        extraeol = []

        # Generate header lines
        if with_table_heading:
            header = funInfos[df[1]] if funInfos else 'f%d' % df[1]
            table.append([
                r'\multicolumn{%d}{@{\,}c@{\,}}{{\textbf{%s}}}' %
                (2 * len(targets) + 2, header)
            ])
            extraeol.append('')

        if function_targets_line is True or (function_targets_line and df[1]
                                             in function_targets_line):
            if isinstance(targetsOfInterest, pproc.RunlengthBasedTargetValues):
                curline = [r'\#FEs/D']
                for i in targetsOfInterest.labels():
                    curline.append(r'\multicolumn{2}{@{}c@{}}{%s}' % i)

            else:
                curline = [r'$\Delta f_\mathrm{opt}$']
                for t in targets:
                    curline.append(
                        r'\multicolumn{2}{@{\,}X@{\,}}{%s}' %
                        writeFEvals2(t, precision=1, isscientific=True))
#                curline.append(r'\multicolumn{2}{@{\,}X@{}|}{%s}'
#                            % writeFEvals2(targets[-1], precision=1, isscientific=True))
            curline.append(r'\multicolumn{2}{@{}l@{}}{\#succ}')
            table.append(curline)
        extraeol.append(r'\hline')
        #        extraeol.append(r'\hline\arrayrulecolor{tableShade}')

        curline = [r'ERT$_{\text{best}}$'
                   ] if with_table_heading else [r'\textbf{f%d}' % df[1]]
        if isinstance(targetsOfInterest, pproc.RunlengthBasedTargetValues):
            # write ftarget:fevals
            for i in range(len(refalgert[:-1])):
                temp = "%.1e" % targetsOfInterest((df[1], df[0]))[i]
                if temp[-2] == "0":
                    temp = temp[:-2] + temp[-1]
                curline.append(
                    r'\multicolumn{2}{@{}c@{}}{\textit{%s}:%s \quad}' %
                    (temp, writeFEvalsMaxPrec(refalgert[i], 2)))
            temp = "%.1e" % targetsOfInterest((df[1], df[0]))[-1]
            if temp[-2] == "0":
                temp = temp[:-2] + temp[-1]
            curline.append(r'\multicolumn{2}{@{}c@{}|}{\textit{%s}:%s }' %
                           (temp, writeFEvalsMaxPrec(refalgert[-1], 2)))
        else:
            # write #fevals of the reference alg
            for i in refalgert[:-1]:
                curline.append(r'\multicolumn{2}{@{}c@{}}{%s \quad}' %
                               writeFEvalsMaxPrec(i, 2))
            curline.append(r'\multicolumn{2}{@{}c@{}|}{%s}' %
                           writeFEvalsMaxPrec(refalgert[-1], 2))

        # write the success ratio for the reference alg
        tmp2 = numpy.sum(
            numpy.isnan(refalgevals) == False)  # count the nb of success
        curline.append('%d' % (tmp2))
        if tmp2 > 0:
            curline.append('/%d' % len(refalgevals))

        table.append(curline[:])
        extraeol.append('')

        #for i, gna in enumerate(zip((1, 2, 3), ('bla', 'blo', 'bli'))):
        #print i, gna, gno
        #set_trace()
        # Format data
        #if df == (5, 17):
        #set_trace()

        header = r'\providecommand{\ntables}{7}'
        for i, alg in enumerate(algnames):
            #algname, entries, irs, line, line2, succ, runs, testres1alg in zip(algnames,
            #data, dispersion, isBoldArray, isItalArray, nbsucc, nbruns, testres):
            commandname = r'\alg%stables' % numtotext(i)
            #            header += r'\providecommand{%s}{{%s}{}}' % (commandname, str_to_latex(strip_pathname(alg)))
            header += r'\providecommand{%s}{\StrLeft{%s}{\ntables}}' % (
                commandname, str_to_latex(strip_pathname2(alg)))
            curline = [
                commandname + r'\hspace*{\fill}'
            ]  # each list element becomes a &-separated table entry?

            for j, tmp in enumerate(
                    zip(
                        algerts[i],
                        algdisp[i],  # j is target index
                        isBoldArray[i],
                        algtestres[i])):
                ert, dispersion, isBold, testres = tmp
                alignment = '@{\,}X@{\,}'
                if j == len(algerts[i]) - 1:
                    alignment = '@{\,}X@{\,}|'

                data = ert / refalgert[j]
                # write star for significance against all other algorithms
                str_significance_subsup = ''
                if (len(best_alg_idx) > 0
                        and len(significance_versus_others) > 0
                        and i == best_alg_idx[j]
                        and nbtests * significance_versus_others[j][1] < 0.05):
                    logp = -numpy.ceil(
                        numpy.log10(
                            nbtests * significance_versus_others[j][1]))
                    str_significance_subsup = r"^{%s%s}" % (
                        significance_vs_others_symbol,
                        str(int(logp)) if logp > 1 else '')

                # moved out of the above else: this was a bug!?
                z, p = testres
                if (nbtests * p) < 0.05 and data < 1. and z < 0.:
                    if not numpy.isinf(refalgert[j]):
                        tmpevals = algevals[i][j].copy()
                        tmpevals[numpy.isnan(tmpevals)] = algentries[
                            i].maxevals[numpy.isnan(tmpevals)]
                        bestevals = refalgentry.detEvals(targets)
                        bestevals, bestalgalg = (bestevals[0][0],
                                                 bestevals[1][0])
                        bestevals[numpy.isnan(
                            bestevals)] = refalgentry.maxevals[bestalgalg][
                                numpy.isnan(bestevals)]
                        tmpevals = numpy.array(sorted(
                            tmpevals))[0:min(len(tmpevals), len(bestevals))]
                        bestevals = numpy.array(sorted(
                            bestevals))[0:min(len(tmpevals), len(bestevals))]

                    #The conditions are now that ERT < ERT_best and
                    # all(sorted(FEvals_best) > sorted(FEvals_current)).
                    if numpy.isinf(refalgert[j]) or all(tmpevals < bestevals):
                        nbstars = -numpy.ceil(numpy.log10(nbtests * p))
                        # tmp2[-1] += r'$^{%s}$' % superscript
                        str_significance_subsup += r'_{%s%s}' % (
                            significance_vs_ref_symbol,
                            str(int(nbstars)) if nbstars > 1 else '')
                if str_significance_subsup:
                    str_significance_subsup = '$%s$' % str_significance_subsup

                # format number in variable data
                if numpy.isnan(data):
                    curline.append(r'\multicolumn{2}{%s}{.}' % alignment)
                else:
                    if numpy.isinf(refalgert[j]):
                        curline.append(
                            r'\multicolumn{2}{%s}{\textbf{%s}\mbox{\tiny (%s)}%s}'
                            % (alignment, writeFEvalsMaxPrec(algerts[i][j], 2),
                               writeFEvalsMaxPrec(dispersion, precdispersion),
                               str_significance_subsup))
                        continue

                    tmp = writeFEvalsMaxPrec(data,
                                             precfloat,
                                             maxfloatrepr=maxfloatrepr)
                    if data >= maxfloatrepr or data < 0.01:  # either inf or scientific notation
                        if numpy.isinf(data) and j == len(algerts[i]) - 1:
                            tmp += r'\,\textit{%s}' % writeFEvalsMaxPrec(
                                algfinaldata[i][1],
                                0,
                                maxfloatrepr=maxfloatrepr)
                        else:
                            tmp = writeFEvalsMaxPrec(data,
                                                     precscien,
                                                     maxfloatrepr=data)
                            if isBold:
                                tmp = r'\textbf{%s}' % tmp

                        if not numpy.isnan(dispersion):
                            tmpdisp = dispersion / refalgert[j]
                            if tmpdisp >= maxfloatrepr or tmpdisp < 0.005:  # TODO: hack
                                tmpdisp = writeFEvalsMaxPrec(
                                    tmpdisp,
                                    precdispersion,
                                    maxfloatrepr=tmpdisp)
                            else:
                                tmpdisp = writeFEvalsMaxPrec(
                                    tmpdisp,
                                    precdispersion,
                                    maxfloatrepr=maxfloatrepr)
                            tmp += r'\mbox{\tiny (%s)}' % tmpdisp
                        curline.append(
                            r'\multicolumn{2}{%s}{%s%s}' %
                            (alignment, tmp, str_significance_subsup))
                    else:
                        tmp2 = tmp.split('.', 1)
                        if len(tmp2) < 2:
                            tmp2.append('')
                        else:
                            tmp2[-1] = '.' + tmp2[-1]
                        if isBold:
                            tmp3 = []
                            for k in tmp2:
                                tmp3.append(r'\textbf{%s}' % k)
                            tmp2 = tmp3
                        if not numpy.isnan(dispersion):
                            tmpdisp = dispersion / refalgert[j]
                            if tmpdisp >= maxfloatrepr or tmpdisp < 0.01:
                                tmpdisp = writeFEvalsMaxPrec(
                                    tmpdisp,
                                    precdispersion,
                                    maxfloatrepr=tmpdisp)
                            else:
                                tmpdisp = writeFEvalsMaxPrec(
                                    tmpdisp,
                                    precdispersion,
                                    maxfloatrepr=maxfloatrepr)
                            tmp2[-1] += (r'\mbox{\tiny (%s)}' % (tmpdisp))
                        tmp2[-1] += str_significance_subsup
                        curline.extend(tmp2)

            curline.append('%d' % algnbsucc[i])
            curline.append('/%d' % algnbruns[i])
            table.append(curline)
            extraeol.append('')

        # Write table
        res = tableXLaTeX(table, spec=spec, extraeol=extraeol)
        try:
            filename = os.path.join(
                outputdir, 'pptables_f%03d_%02dD.tex' % (df[1], df[0]))
            f = open(filename, 'w')
            f.write(header + '\n')
            f.write(res)
            if verbose:
                print('Wrote table in %s' % filename)
        except:
            raise
        else:
            f.close()
Esempio n. 7
0
def main(dsList, dimsOfInterest, outputdir, info='', verbose=True):
    """Generate a table of ratio ERT/ERTbest vs target precision.

    1 table per dimension will be generated.

    Rank-sum tests table on "Final Data Points" for only one algorithm.
    that is, for example, using 1/#fevals(ftarget) if ftarget was
    reached and -f_final otherwise as input for the rank-sum test, where
    obviously the larger the better.

    """
    #TODO: check that it works for any reference algorithm?
    #in the following the reference algorithm is the one given in
    #bestalg.bestalgentries which is the virtual best of BBOB
    dictDim = dsList.dictByDim()
    targetf = 1e-8
    if info:
        info = '_' + info
        # insert a separator between the default file name and the additional
        # information string.

    dims = set(dictDim.keys())
    if not bestalg.bestalgentries2009:
        bestalg.loadBBOB2009()
    if isinstance(targetsOfInterest, pproc.RunlengthBasedTargetValues):
        header = [r'\#FEs/D']
        for i in targetsOfInterest.labels():
            header.append(r'\multicolumn{2}{@{}c@{}}{%s}' % i)

    else:
        header = [r'$\Delta f$']
        for i in targetsOfInterest.target_values:
            header.append(r'\multicolumn{2}{@{}c@{}}{1e%+d}' %
                          (int(np.log10(i))))
    header.append(r'\multicolumn{2}{|@{}r@{}}{\#succ}')

    for d in dimsOfInterest:
        table = [header]
        extraeol = [r'\hline']
        try:
            dictFunc = dictDim[d].dictByFunc()
        except KeyError:
            continue
        funcs = set(dictFunc.keys())
        nbtests = float(len(funcs))  # #funcs tests times one algorithm

        for f in sorted(funcs):
            bestalgentry = bestalg.bestalgentries2009[(d, f)]
            curline = [r'${\bf f_{%d}}$' % f]
            bestalgdata = bestalgentry.detERT(targetsOfInterest((f, d)))
            bestalgevals, bestalgalgs = bestalgentry.detEvals(
                targetsOfInterest((f, d)))
            if isinstance(targetsOfInterest, pproc.RunlengthBasedTargetValues):
                #write ftarget:fevals
                for i in range(len(bestalgdata[:-1])):
                    temp = "%.1e" % targetsOfInterest((f, d))[i]
                    if temp[-2] == "0":
                        temp = temp[:-2] + temp[-1]
                    curline.append(
                        r'\multicolumn{2}{@{}c@{}}{\textit{%s}:%s \quad}' %
                        (temp, writeFEvalsMaxPrec(bestalgdata[i], 2)))
                temp = "%.1e" % targetsOfInterest((f, d))[-1]
                if temp[-2] == "0":
                    temp = temp[:-2] + temp[-1]
                curline.append(r'\multicolumn{2}{@{}c@{}|}{\textit{%s}:%s }' %
                               (temp, writeFEvalsMaxPrec(bestalgdata[-1], 2)))
                #success
                targetf = targetsOfInterest((f, d))[-1]

            else:
                # write #fevals of the reference alg
                for i in bestalgdata[:-1]:
                    curline.append(r'\multicolumn{2}{@{}c@{}}{%s \quad}' %
                                   writeFEvalsMaxPrec(i, 2))
                curline.append(r'\multicolumn{2}{@{}c@{}|}{%s}' %
                               writeFEvalsMaxPrec(bestalgdata[-1], 2))

            # write the success ratio for the reference alg
            tmp = bestalgentry.detEvals([targetf])[0][0]
            tmp2 = np.sum(np.isnan(tmp) == False)  # count the nb of success
            curline.append('%d' % (tmp2))
            if tmp2 > 0:
                curline.append('/%d' % len(tmp))

            table.append(curline[:])
            extraeol.append('')

            # generate all data for ranksum test
            assert len(dictFunc[f]) == 1
            entry = dictFunc[f][0]  # take the first element
            ertdata = entry.detERT(targetsOfInterest((f, d)))

            testresbestvs1 = significancetest(bestalgentry, entry,
                                              targetsOfInterest((f, d)))

            #for nb, entry in enumerate(entries):
            #curline = [r'\algshort\hspace*{\fill}']
            curline = ['']
            #data = entry.detERT(targetsOfInterest)
            evals = entry.detEvals(targetsOfInterest((f, d)))
            dispersion = []
            data = []
            for i in evals:
                succ = (np.isnan(i) == False)
                tmp = i.copy()
                tmp[succ == False] = entry.maxevals[np.isnan(i)]
                #set_trace()
                # TODO: what is the difference between data and ertdata?
                data.append(toolsstats.sp(tmp, issuccessful=succ)[0])
                #if not any(succ):
                #set_trace()
                if any(succ):
                    tmp2 = toolsstats.drawSP(tmp[succ], tmp[succ == False],
                                             (10, 50, 90), samplesize)[0]
                    dispersion.append((tmp2[-1] - tmp2[0]) / 2.)
                else:
                    dispersion.append(None)
            assert data == ertdata
            for i, ert in enumerate(data):
                alignment = 'c'
                if i == len(data) - 1:  # last element
                    alignment = 'c|'

                nbstars = 0
                z, p = testresbestvs1[i]
                if ert - bestalgdata[i] < 0. and not np.isinf(bestalgdata[i]):
                    evals = entry.detEvals([targetsOfInterest((f, d))[i]])[0]
                    evals[np.isnan(evals)] = entry.maxevals[np.isnan(evals)]
                    bestevals = bestalgentry.detEvals(
                        [targetsOfInterest((f, d))[i]])
                    bestevals, bestalgalg = (bestevals[0][0], bestevals[1][0])
                    bestevals[np.isnan(bestevals)] = bestalgentry.maxevals[
                        bestalgalg][np.isnan(bestevals)]
                    evals = np.array(
                        sorted(evals))[0:min(len(evals), len(bestevals))]
                    bestevals = np.array(
                        sorted(bestevals))[0:min(len(evals), len(bestevals))]

                #The conditions for significance are now that ERT < ERT_best and
                # all(sorted(FEvals_best) > sorted(FEvals_current)).
                if ((nbtests * p) < 0.05 and ert - bestalgdata[i] < 0.
                        and z < 0. and
                    (np.isinf(bestalgdata[i]) or all(evals < bestevals))):
                    nbstars = -np.ceil(np.log10(nbtests * p))
                isBold = False
                if nbstars > 0:
                    isBold = True

                if np.isinf(bestalgdata[i]
                            ):  # if the best did not solve the problem
                    tmp = writeFEvalsMaxPrec(float(ert), 2)
                    if not np.isinf(ert):
                        tmp = r'\textit{%s}' % (tmp)
                        if isBold:
                            tmp = r'\textbf{%s}' % tmp

                    tableentry = (r'\multicolumn{2}{@{}%s@{}}{%s}' %
                                  (alignment, tmp))
                else:
                    # Formatting
                    tmp = float(ert) / bestalgdata[i]
                    assert not np.isnan(tmp)
                    tableentry = writeFEvalsMaxPrec(tmp, 2)

                    if np.isinf(tmp) and i == len(data) - 1:
                        tableentry = (
                            tableentry + r'\textit{%s}' %
                            writeFEvals2(np.median(entry.maxevals), 2))
                        if isBold:
                            tableentry = r'\textbf{%s}' % tableentry
                        elif 11 < 3:  # and significance0vs1 < 0:
                            tableentry = r'\textit{%s}' % tableentry
                        tableentry = (r'\multicolumn{2}{@{}%s@{}}{%s}' %
                                      (alignment, tableentry))
                    elif tableentry.find('e') > -1 or (np.isinf(tmp)
                                                       and i != len(data) - 1):
                        if isBold:
                            tableentry = r'\textbf{%s}' % tableentry
                        elif 11 < 3:  # and significance0vs1 < 0:
                            tableentry = r'\textit{%s}' % tableentry
                        tableentry = (r'\multicolumn{2}{@{}%s@{}}{%s}' %
                                      (alignment, tableentry))
                    else:
                        tmp = tableentry.split('.', 1)
                        if isBold:
                            tmp = list(r'\textbf{%s}' % i for i in tmp)
                        elif 11 < 3:  # and significance0vs1 < 0:
                            tmp = list(r'\textit{%s}' % i for i in tmp)
                        tableentry = ' & .'.join(tmp)
                        if len(tmp) == 1:
                            tableentry += '&'

                superscript = ''

                if nbstars > 0:
                    #tmp = '\hspace{-.5ex}'.join(nbstars * [r'\star'])
                    if z > 0:
                        superscript = r'\uparrow'  #* nbstars
                    else:
                        superscript = r'\downarrow'  #* nbstars
                        # print z, linebest[i], line1
                    if nbstars > 1:
                        superscript += str(int(min((9, nbstars))))
                        # superscript += str(int(nbstars))

                #if superscript or significance0vs1:
                #s = ''
                #if significance0vs1 > 0:
                #s = '\star'
                #if significance0vs1 > 1:
                #s += str(significance0vs1)
                #s = r'$^{' + s + superscript + r'}$'

                #if tableentry.endswith('}'):
                #tableentry = tableentry[:-1] + s + r'}'
                #else:
                #tableentry += s

                if dispersion[i]:
                    if not np.isinf(bestalgdata[i]):
                        tmp = writeFEvalsMaxPrec(
                            dispersion[i] / bestalgdata[i], 1)
                    else:
                        tmp = writeFEvalsMaxPrec(dispersion[i], 1)
                    tableentry += (r'${\scriptscriptstyle(%s)}$' % tmp)

                if superscript:
                    s = r'$^{' + superscript + r'}$'

                    if tableentry.endswith('}'):
                        tableentry = tableentry[:-1] + s + r'}'
                    else:
                        tableentry += s

                curline.append(tableentry)

                #curline.append(tableentry)
                #if dispersion[i] is None or np.isinf(bestalgdata[i]):
                #curline.append('')
                #else:
                #tmp = writeFEvalsMaxPrec(dispersion[i]/bestalgdata[i], 2)
                #curline.append('(%s)' % tmp)

            tmp = entry.evals[entry.evals[:, 0] <= targetf, 1:]
            try:
                tmp = tmp[0]
                curline.append('%d' % np.sum(np.isnan(tmp) == False))
            except IndexError:
                curline.append('%d' % 0)
            curline.append('/%d' % entry.nbRuns())

            table.append(curline[:])
            extraeol.append(r'\hline')
        extraeol[-1] = ''

        outputfile = os.path.join(outputdir, 'pptable_%02dD%s.tex' % (d, info))
        if isinstance(targetsOfInterest, pproc.RunlengthBasedTargetValues):
            spec = r'@{}c@{}|' + '*{%d}{@{ }r@{}@{}l@{}}' % len(
                targetsOfInterest) + '|@{}r@{}@{}l@{}'
        else:
            spec = r'@{}c@{}|' + '*{%d}{@{}r@{}@{}l@{}}' % len(
                targetsOfInterest) + '|@{}r@{}@{}l@{}'
        #res = r'\providecommand{\algshort}{%s}' % alg1 + '\n'
        #res += tableLaTeXStar(table, width=r'0.45\textwidth', spec=spec,
        #extraeol=extraeol)
        res = tableLaTeX(table, spec=spec, extraeol=extraeol)
        f = open(outputfile, 'w')
        f.write(res)
        f.close()
        if verbose:
            print("Table written in %s" % outputfile)
Esempio n. 8
0
def annotate(entry0, entry1, dim, minfvalue=1e-8, nbtests=1):
    """Display some annotations associated to the graphs generated."""

    isEarlyStop = False
    ha = 'left'
    va = 'center'
    lastfvalue = min(entry0.evals[-1][0], entry1.evals[-1][0])
    if minfvalue < lastfvalue:
        isEarlyStop = True
        #ha = 'center'
        #va = 'top'

    if not minfvalue or minfvalue < lastfvalue:
        minfvalue = lastfvalue

    line = []
    data0 = entry0.detEvals([minfvalue])[0]
    evals0 = data0.copy()
    succ = (np.isnan(evals0) == False)
    evals0[np.isnan(evals0)] = entry0.maxevals[np.isnan(evals0)]
    line.append(toolsstats.sp(evals0, issuccessful=succ))
    data1 = entry1.detEvals([minfvalue])[0]
    evals1 = data1.copy()
    succ = (np.isnan(evals1) == False)
    evals1[np.isnan(evals1)] = entry1.maxevals[np.isnan(evals1)]
    line.append(toolsstats.sp(evals1, issuccessful=succ))

    # What's the situation?
    txt = '%dD' % dim
    if (line[0][2] > 0 and line[1][2] > 0 and line[1][2] < 10):
        tmp = str(int(line[1][2]))
        tmp2 = str(int(line[0][2]))
        txt = tmp + '/' + tmp2

    dims = dimension_index
    ax = plt.gca()
    assert line[0][2] > 0 or line[1][2] > 0
    signdata = line[1][0] - line[0][0]

    if line[0][2] > 0 and line[1][2] > 0:
        trans = ax.transData
        annotcoord = [minfvalue, line[1][0] / line[0][0]]
    elif line[0][2] == 0:
        trans = blend(ax.transData, ax.transAxes)
        annotcoord = [
            minfvalue, -line[1][1] / 2 + 0.5 + offset * (5 - dims[dim])
        ]
        #if va == 'top':
        #    va = 'bottom'
    elif line[1][2] == 0:
        trans = blend(ax.transData, ax.transAxes)
        annotcoord = [
            minfvalue, line[0][1] / 2 + 0.5 - offset * (5 - dims[dim])
        ]

    plt.text(annotcoord[0],
             annotcoord[1],
             txt,
             horizontalalignment=ha,
             verticalalignment=va,
             transform=trans)

    #ranksum test
    line0 = np.power(data0, -1.)
    line0[np.isnan(line0)] = -entry0.finalfunvals[np.isnan(line0)]
    line1 = np.power(data1, -1.)
    line1[np.isnan(line1)] = -entry1.finalfunvals[np.isnan(line1)]
    # one-tailed statistics: scipy.stats.mannwhitneyu, two-tailed statistics: scipy.stats.ranksumtest
    z, p = ranksumtest(line0, line1)
    # Set the correct line in data0 and data1
    nbstars = 0
    # sign of z-value and data must agree
    if ((nbtests * p) < 0.05 and (z * signdata) > 0):
        nbstars = np.min([5, -np.ceil(np.log10(nbtests * p + 1e-99))])
    if nbstars > 0:
        xstars = annotcoord[0] * np.power(incrstars, np.arange(
            1., 1. + nbstars))
        # the additional slicing [0:int(nbstars)] is due to
        # np.arange(1., 1. - 0.1 * nbstars, -0.1) not having the right number
        # of elements due to numerical error
        ystars = [annotcoord[1]] * nbstars

        try:
            h = plt.plot(xstars,
                         ystars,
                         marker='*',
                         ls='',
                         color='w',
                         markersize=5 * linewidth,
                         markeredgecolor='k',
                         markerfacecolor='None',
                         zorder=20,
                         markeredgewidth=0.4 * linewidth,
                         transform=trans,
                         clip_on=False)
        except KeyError:
            #Version problem
            h = plt.plot(xstars,
                         ystars,
                         marker='+',
                         ls='',
                         color='w',
                         markersize=2.5 * linewidth,
                         markeredgecolor='k',
                         zorder=20,
                         markeredgewidth=0.2 * linewidth,
                         transform=trans,
                         clip_on=False)
Esempio n. 9
0
def plot(dsList, param='dim', targets=(10., 1., 1e-1, 1e-2, 1e-3, 1e-5, 1e-8)):
    """Generate plot of ERT vs param."""

    dictparam = dsList.dictByParam(param)
    params = sorted(dictparam) # sorted because we draw lines

    # generate plot from dsList
    res = []
    # collect data
    rawdata = {}
    for p in params:
        assert len(dictparam[p]) == 1
        rawdata[p] = dictparam[p][0].detEvals(targets)
        # expect dictparam[p] to have only one element

    # plot lines for ERT
    xpltdata = params
    for i, t in enumerate(targets):
        ypltdata = []
        for p in params:
            data = rawdata[p][i]
            unsucc = np.isnan(data)
            assert len(dictparam[p]) == 1
            data[unsucc] = dictparam[p][0].maxevals
            # compute ERT
            ert, srate, succ = toolsstats.sp(data, issuccessful=(unsucc == False))
            ypltdata.append(ert)
        res.extend(plt.plot(xpltdata, ypltdata, markersize=20,
                   zorder=len(targets) - i, **styles[i]))
        # for the legend
        plt.plot([], [], markersize=10,
                 label=' %+d' % (np.log10(targets[i])),
                 **styles[i])

    # plot median of successful runs for hardest target with a success
    for p in params:
        for i, t in enumerate(reversed(targets)): # targets has to be from hardest to easiest
            data = rawdata[p][i]
            data = data[np.isnan(data) == False]
            if len(data) > 0:
                median = toolsstats.prctile(data, 50.)[0]
                res.extend(plt.plot(p, median, styles[i]['color'], **medmarker))
                break

    # plot average number of function evaluations for the hardest target
    xpltdata = []
    ypltdata = []
    for p in params:
        data = rawdata[p][0] # first target
        xpltdata.append(p)
        if (np.isnan(data) == False).all():
            tmpdata = data.copy()
            assert len(dictparam[p]) == 1
            tmpdata[np.isnan(data)] = dictparam[p][0].maxevals[np.isnan(data)]
            tmp = np.mean(tmpdata)
        else:
            tmp = np.nan # Check what happens when plotting NaN
        ypltdata.append(tmp)
    res.extend(plt.plot(xpltdata, ypltdata, **avgstyle))

    # display numbers of successes for hardest target where there is still one success
    for p in params:
        for i, t in enumerate(targets): # targets has to be from hardest to easiest
            data = rawdata[p][i]
            unsucc = np.isnan(data)
            assert len(dictparam[p]) == 1
            data[unsucc] = dictparam[p][0].maxevals
            # compute ERT
            ert, srate, succ = toolsstats.sp(data, issuccessful=(unsucc == False))
            if srate == 1.:
                break
            elif succ > 0:
                res.append(plt.text(p, ert * 1.85, "%d" % succ, axes=plt.gca(),
                                    horizontalalignment="center",
                                    verticalalignment="bottom"))
                break
    return res