Ejemplo n.º 1
0
def generateData(dsList, evals, CrE_A):
    res = {}

    D = set(i.dim for i in dsList).pop() # should have only one element
    #if D == 3:
       #set_trace()

    bestalgentries = bestalg.loadBestAlgorithm(dsList.isBiobjective())

    for fun, tmpdsList in dsList.dictByFunc().iteritems():
        assert len(tmpdsList) == 1
        entry = tmpdsList[0]

        bestalgentry = bestalgentries[(D, fun)]

        #ERT_A
        f_A = detf(entry, evals)

        ERT_best = detERT(bestalgentry, f_A)
        ERT_A = detERT(entry, f_A)
        nextbestf = []
        for i in f_A:
            if i == 0.:
                nextbestf.append(0.)
            else:
                tmp = bestalgentry.target[bestalgentry.target < i]
                try:
                    nextbestf.append(tmp[0])
                except IndexError:
                    nextbestf.append(i * 10.**(-0.2)) # TODO: this is a hack

        ERT_best_nextbestf = detERT(bestalgentry, nextbestf)

        for i in range(len(ERT_A)):
            # nextbestf[i] >= f_thresh: this is tested because if it is not true
            # ERT_best_nextbestf[i] is supposed to be infinite.
            if nextbestf[i] >= f_thresh and ERT_best_nextbestf[i] < evals[i]: # is different from the specification...
                ERT_A[i] = evals[i]

        # For test purpose:
        #if fun % 10 == 0:
        #    ERT_A[-2] = 1.
        #    ERT_best[-2] = np.inf
        ERT_A = np.array(ERT_A)
        ERT_best = np.array(ERT_best)
        loss_A = np.exp(CrE_A) * ERT_A / ERT_best
        assert (np.isnan(loss_A) == False).all()
        #set_trace()
        #if np.isnan(loss_A).any() or np.isinf(loss_A).any() or (loss_A == 0.).any():
        #    txt = 'Problem with entry %s' % str(entry)
        #    warnings.warn(txt)
        #    #set_trace()
        res[fun] = loss_A

    return res
Ejemplo n.º 2
0
def main(dictAlg, isBiobjective, order=None, outputdir='.', info='default',
         dimension=None, verbose=True):
    """Generates a figure showing the performance of algorithms.

    From a dictionary of :py:class:`DataSetList` sorted by algorithms,
    generates the cumulative distribution function of the bootstrap
    distribution of ERT for algorithms on multiple functions for
    multiple targets altogether.

    :param dict dictAlg: dictionary of :py:class:`DataSetList` instances
                         one instance is equivalent to one algorithm,
    :param list targets: target function values
    :param list order: sorted list of keys to dictAlg for plotting order
    :param str outputdir: output directory
    :param str info: output file name suffix
    :param bool verbose: controls verbosity

    """
    global x_limit  # late assignment of default, because it can be set to None in config 
    global divide_by_dimension  # not fully implemented/tested yet
    if 'x_limit' not in globals() or x_limit is None:
        x_limit = x_limit_default

    tmp = pp.dictAlgByDim(dictAlg)
    # tmp = pp.DictAlg(dictAlg).by_dim()

    if len(tmp) != 1 and dimension is None:
        raise ValueError('We never integrate over dimension.')
    if dimension is not None:
        if dimension not in tmp.keys():
            raise ValueError('dimension %d not in dictAlg dimensions %s'
                             % (dimension, str(tmp.keys())))
        tmp = {dimension: tmp[dimension]}
    dim = tmp.keys()[0]
    divisor = dim if divide_by_dimension else 1

    algorithms_with_data = [a for a in dictAlg.keys() if dictAlg[a] != []]

    dictFunc = pp.dictAlgByFun(dictAlg)

    # Collect data
    # Crafting effort correction: should we consider any?
    CrEperAlg = {}
    for alg in algorithms_with_data:
        CrE = 0.
        if 1 < 3 and dictAlg[alg][0].algId == 'GLOBAL':
            tmp = dictAlg[alg].dictByNoise()
            assert len(tmp.keys()) == 1
            if tmp.keys()[0] == 'noiselessall':
                CrE = 0.5117
            elif tmp.keys()[0] == 'nzall':
                CrE = 0.6572
        CrEperAlg[alg] = CrE
        if CrE != 0.0: 
            print 'Crafting effort for', alg, 'is', CrE

    dictData = {} # list of (ert per function) per algorithm
    dictMaxEvals = {} # list of (maxevals per function) per algorithm
    bestERT = [] # best ert per function
    # funcsolved = [set()] * len(targets) # number of functions solved per target
    xbest2009 = []
    maxevalsbest2009 = []
    for f, dictAlgperFunc in dictFunc.iteritems():
        if function_IDs and f not in function_IDs:
            continue
        # print target_values((f, dim))
        for j, t in enumerate(target_values((f, dim))):
        # for j, t in enumerate(genericsettings.current_testbed.ecdf_target_values(1e2, f)):
            # funcsolved[j].add(f)

            for alg in algorithms_with_data:
                x = [np.inf] * perfprofsamplesize
                runlengthunsucc = []
                try:
                    entry = dictAlgperFunc[alg][0] # one element per fun and per dim.
                    evals = entry.detEvals([t])[0]
                    assert entry.dim == dim
                    runlengthsucc = evals[np.isnan(evals) == False] / divisor
                    runlengthunsucc = entry.maxevals[np.isnan(evals)] / divisor
                    if len(runlengthsucc) > 0:
                        x = toolsstats.drawSP(runlengthsucc, runlengthunsucc,
                                             percentiles=[50],
                                             samplesize=perfprofsamplesize)[1]
                except (KeyError, IndexError):
                    #set_trace()
                    warntxt = ('Data for algorithm %s on function %d in %d-D '
                           % (alg, f, dim)
                           + 'are missing.\n')
                    warnings.warn(warntxt)

                dictData.setdefault(alg, []).extend(x)
                dictMaxEvals.setdefault(alg, []).extend(runlengthunsucc)

        displaybest2009 = not isBiobjective #disabled until we find the bug
        if displaybest2009:
            #set_trace()
            bestalgentries = bestalg.loadBestAlgorithm(isBiobjective)
            bestalgentry = bestalgentries[(dim, f)]
            bestalgevals = bestalgentry.detEvals(target_values((f, dim)))
            # print bestalgevals
            for j in range(len(bestalgevals[0])):
                if bestalgevals[1][j]:
                    evals = bestalgevals[0][j]
                    #set_trace()
                    assert dim == bestalgentry.dim
                    runlengthsucc = evals[np.isnan(evals) == False] / divisor
                    runlengthunsucc = bestalgentry.maxevals[bestalgevals[1][j]][np.isnan(evals)] / divisor
                    x = toolsstats.drawSP(runlengthsucc, runlengthunsucc,
                                         percentiles=[50],
                                         samplesize=perfprofsamplesize)[1]
                else:
                    x = perfprofsamplesize * [np.inf]
                    runlengthunsucc = []
                xbest2009.extend(x)
                maxevalsbest2009.extend(runlengthunsucc)
                
    if order is None:
        order = dictData.keys()

    # Display data
    lines = []
    if displaybest2009:
        args = {'ls': '-', 'linewidth': 6, 'marker': 'D', 'markersize': 11.,
                'markeredgewidth': 1.5, 'markerfacecolor': refcolor,
                'markeredgecolor': refcolor, 'color': refcolor,
                'label': 'best 2009', 'zorder': -1}
        lines.append(plotdata(np.array(xbest2009), x_limit, maxevalsbest2009,
                                  CrE = 0., **args))

    def algname_to_label(algname, dirname=None):
        """to be extended to become generally useful"""
        if isinstance(algname, (tuple, list)): # not sure this is needed
            return ' '.join([str(name) for name in algname])
        return str(algname)
    for i, alg in enumerate(order):
        try:
            data = dictData[alg]
            maxevals = dictMaxEvals[alg]
        except KeyError:
            continue

        args = styles[(i) % len(styles)]
        args['linewidth'] = 1.5
        args['markersize'] = 12.
        args['markeredgewidth'] = 1.5
        args['markerfacecolor'] = 'None'
        args['markeredgecolor'] = args['color']
        args['label'] = algname_to_label(alg)
        #args['markevery'] = perfprofsamplesize # option available in latest version of matplotlib
        #elif len(show_algorithms) > 0:
            #args['color'] = 'wheat'
            #args['ls'] = '-'
            #args['zorder'] = -1
        # plotdata calls pprldistr.plotECDF which calls ppfig.plotUnifLog... which does the work
        lines.append(plotdata(np.array(data), x_limit, maxevals,
                                  CrE=CrEperAlg[alg], **args))

    labels, handles = plotLegend(lines, x_limit)
    if True:  # isLateXLeg:
        fileName = os.path.join(outputdir,'pprldmany_%s.tex' % (info))
        with open(fileName, 'w') as f:
            f.write(r'\providecommand{\nperfprof}{7}')
            algtocommand = {}  # latex commands
            for i, alg in enumerate(order):
                tmp = r'\alg%sperfprof' % pptex.numtotext(i)
                f.write(r'\providecommand{%s}{\StrLeft{%s}{\nperfprof}}' %
                        (tmp, toolsdivers.str_to_latex(
                                toolsdivers.strip_pathname2(algname_to_label(alg)))))
                algtocommand[algname_to_label(alg)] = tmp
            if displaybest2009:
                tmp = r'\algzeroperfprof'
                f.write(r'\providecommand{%s}{best 2009}' % (tmp))
                algtocommand['best 2009'] = tmp

            commandnames = []
            for label in labels:
                commandnames.append(algtocommand[label])
            # f.write(headleg)
            if len(order) > 28:  # latex sidepanel won't work well for more than 25 algorithms, but original labels are also clipped
                f.write(r'\providecommand{\perfprofsidepanel}{\mbox{%s}\vfill\mbox{%s}}'
                        % (commandnames[0], commandnames[-1]))
            else:
                fontsize_command = r'\tiny{}' if len(order) > 19 else ''
                f.write(r'\providecommand{\perfprofsidepanel}{{%s\mbox{%s}' %
                        (fontsize_command, commandnames[0])) # TODO: check len(labels) > 0
                for i in range(1, len(labels)):
                    f.write('\n' + r'\vfill \mbox{%s}' % commandnames[i])
                f.write('}}\n')
            # f.write(footleg)
            if verbose:
                print 'Wrote right-hand legend in %s' % fileName

    figureName = os.path.join(outputdir,'pprldmany_%s' % (info))
    #beautify(figureName, funcsolved, x_limit*x_annote_factor, False, fileFormat=figformat)
    beautify()

    text = ppfig.consecutiveNumbers(sorted(dictFunc.keys()), 'f')
    text += ',%d-D' % dim  # TODO: this is strange when different dimensions are plotted
    plt.text(0.01, 0.98, text, horizontalalignment="left",
             verticalalignment="top", transform=plt.gca().transAxes)
    if len(dictFunc) == 1:
        plt.title(' '.join((str(dictFunc.keys()[0]),
                  genericsettings.current_testbed.short_names[dictFunc.keys()[0]])))
    a = plt.gca()

    plt.xlim(xmin=1e-0, xmax=x_limit**annotation_space_end_relative)
    xticks, labels = plt.xticks()
    tmp = []
    for i in xticks:
        tmp.append('%d' % round(np.log10(i)))
    a.set_xticklabels(tmp)

    if save_figure:
        ppfig.saveFigure(figureName, verbose=verbose)
        if len(dictFunc) == 1:
            ppfig.save_single_functions_html(
                os.path.join(outputdir, 'pprldmany'),
                '', # algorithms names are clearly visible in the figure
                add_to_names='_%02dD' %(dim),
                algorithmCount=ppfig.AlgorithmCount.NON_SPECIFIED
            )
    if close_figure:
        plt.close()
Ejemplo n.º 3
0
def main(dictAlg, sortedAlgs, isBiobjective, outputdir='.', verbose=True, function_targets_line=True):  # [1, 13, 101]
    """Generate one table per func with results of multiple algorithms."""
    """Difference with the first version:

    * numbers aligned using the decimal separator
    * premices for dispersion measure
    * significance test against best algorithm
    * table width...

    Takes ``targetsOfInterest`` from this file as "input argument" to compute
    the desired target values. ``targetsOfInterest`` might be configured via 
    config.
    
    """

    # TODO: method is long, terrible to read, split if possible

    bestalgentries = bestalg.loadBestAlgorithm(isBiobjective)

    # Sort data per dimension and function
    dictData = {}
    dsListperAlg = list(dictAlg[i] for i in sortedAlgs)
    for n, entries in enumerate(dsListperAlg):
        tmpdictdim = entries.dictByDim()
        for d in tmpdictdim:
            tmpdictfun = tmpdictdim[d].dictByFunc()
            for f in tmpdictfun:
                dictData.setdefault((d, f), {})[n] = tmpdictfun[f]

    nbtests = len(dictData)

    funInfos = ppfigparam.read_fun_infos(isBiobjective)    

    for df in dictData:
        # Generate one table per df
        # first update targets for each dimension-function pair if needed:
        targets = targetsOfInterest((df[1], df[0]))            
        targetf = targets[-1]
        
        # best 2009
        refalgentry = bestalgentries[df]
        refalgert = refalgentry.detERT(targets)
        refalgevals = (refalgentry.detEvals((targetf, ))[0][0])
        refalgnbruns = len(refalgevals)
        refalgnbsucc = numpy.sum(numpy.isnan(refalgevals) == False)

        # Process the data
        # The following variables will be lists of elements each corresponding
        # to an algorithm
        algnames = []
        #algdata = []
        algerts = []
        algevals = []
        algdisp = []
        algnbsucc = []
        algnbruns = []
        algmedmaxevals = []
        algmedfinalfunvals = []
        algtestres = []
        algentries = []

        for n in sorted(dictData[df].keys()):
            entries = dictData[df][n]
            # the number of datasets for a given dimension and function (df)
            # should be strictly 1. TODO: find a way to warn
            # TODO: do this checking before... why wasn't it triggered by ppperprof?
            if len(entries) > 1:
                print entries
                txt = ("There is more than a single entry associated with "
                       "folder %s on %d-D f%d." % (sortedAlgs[n], df[0], df[1]))
                raise Exception(txt)

            entry = entries[0]
            algentries.append(entry)

            algnames.append(sortedAlgs[n])

            evals = entry.detEvals(targets)
            #tmpdata = []
            tmpdisp = []
            tmpert = []
            for i, e in enumerate(evals):
                succ = (numpy.isnan(e) == False)
                ec = e.copy() # note: here was the previous bug (changes made in e also appeared in evals !)
                ec[succ == False] = entry.maxevals[succ == False]
                ert = toolsstats.sp(ec, issuccessful=succ)[0]
                #tmpdata.append(ert/refalgert[i])
                if succ.any():
                    tmp = toolsstats.drawSP(ec[succ], entry.maxevals[succ == False],
                                           [10, 50, 90], samplesize=samplesize)[0]
                    tmpdisp.append((tmp[-1] - tmp[0])/2.)
                else:
                    tmpdisp.append(numpy.nan)
                tmpert.append(ert)
            algerts.append(tmpert)
            algevals.append(evals)
            #algdata.append(tmpdata)
            algdisp.append(tmpdisp)
            algmedmaxevals.append(numpy.median(entry.maxevals))
            algmedfinalfunvals.append(numpy.median(entry.finalfunvals))
            #algmedmaxevals.append(numpy.median(entry.maxevals)/df[0])
            #algmedfinalfunvals.append(numpy.median(entry.finalfunvals))

            algtestres.append(significancetest(refalgentry, entry, targets))

            # determine success probability for Df = 1e-8
            e = entry.detEvals((targetf ,))[0]
            algnbsucc.append(numpy.sum(numpy.isnan(e) == False))
            algnbruns.append(len(e))

        # Process over all data
        # find best values...
            
        nalgs = len(dictData[df])
        maxRank = 1 + numpy.floor(0.14 * nalgs)  # number of algs to be displayed in bold

        isBoldArray = [] # Point out the best values
        algfinaldata = [] # Store median function values/median number of function evaluations
        tmptop = getTopIndicesOfColumns(algerts, maxRank=maxRank)
        for i, erts in enumerate(algerts):
            tmp = []
            for j, ert in enumerate(erts):  # algi targetj
                tmp.append(i in tmptop[j] or (nalgs > 7 and algerts[i][j] <= 3. * refalgert[j]))
            isBoldArray.append(tmp)
            algfinaldata.append((algmedfinalfunvals[i], algmedmaxevals[i]))

        # significance test of best given algorithm against all others
        best_alg_idx = numpy.array(algerts).argsort(0)[0, :]  # indexed by target index
        significance_versus_others = significance_all_best_vs_other(algentries, targets, best_alg_idx)[0]
                
        # Create the table
        table = []
        tableHtml = []
        spec = r'@{}c@{}|*{%d}{@{\,}r@{}X@{\,}}|@{}r@{}@{}l@{}' % (len(targets)) # in case StrLeft not working: replaced c@{} with l@{ }
        spec = r'@{}c@{}|*{%d}{@{}r@{}X@{}}|@{}r@{}@{}l@{}' % (len(targets)) # in case StrLeft not working: replaced c@{} with l@{ }
        extraeol = []

        # Generate header lines
        if with_table_heading:
            header = funInfos[df[1]] if df[1] in funInfos.keys() else 'f%d' % df[1]
            table.append([r'\multicolumn{%d}{@{\,}c@{\,}}{{\textbf{%s}}}'
                          % (2 * len(targets) + 2, header)])
            extraeol.append('')

        if function_targets_line is True or (function_targets_line and df[1] in function_targets_line):
            if isinstance(targetsOfInterest, pproc.RunlengthBasedTargetValues):
                curline = [r'\#FEs/D']
                curlineHtml = ['<thead>\n<tr>\n<th>#FEs/D<br>REPLACEH</th>\n']
                counter = 1
                for i in targetsOfInterest.labels():
                    curline.append(r'\multicolumn{2}{@{}c@{}}{%s}' % i) 
                    curlineHtml.append('<td>%s<br>REPLACE%d</td>\n' % (i, counter))
                    counter += 1
            else:
                curline = [r'$\Delta f_\mathrm{opt}$']
                curlineHtml = ['<thead>\n<tr>\n<th>&#916; f<sub>opt</sub><br>REPLACEH</th>\n']
                counter = 1
                for t in targets:
                    curline.append(r'\multicolumn{2}{@{\,}X@{\,}}{%s}'
                                % writeFEvals2(t, precision=1, isscientific=True))
                    curlineHtml.append('<td>%s<br>REPLACE%d</td>\n' % (writeFEvals2(t, precision=1, isscientific=True), counter))
                    counter += 1
#                curline.append(r'\multicolumn{2}{@{\,}X@{}|}{%s}'
#                            % writeFEvals2(targets[-1], precision=1, isscientific=True))
            curline.append(r'\multicolumn{2}{@{}l@{}}{\#succ}')
            curlineHtml.append('<td>#succ<br>REPLACEF</td>\n</tr>\n</thead>\n')
            table.append(curline)
            
        extraeol.append(r'\hline')
#        extraeol.append(r'\hline\arrayrulecolor{tableShade}')

        curline = [r'ERT$_{\text{best}}$'] if with_table_heading else [r'\textbf{f%d}' % df[1]] 
        replaceValue = 'ERT<sub>best</sub>' if with_table_heading else ('<b>f%d</b>' % df[1])
        curlineHtml = [item.replace('REPLACEH', replaceValue) for item in curlineHtml]
        if isinstance(targetsOfInterest, pproc.RunlengthBasedTargetValues):
            # write ftarget:fevals
            counter = 1
            for i in xrange(len(refalgert[:-1])):
                temp="%.1e" %targetsOfInterest((df[1], df[0]))[i]
                if temp[-2]=="0":
                    temp=temp[:-2]+temp[-1]
                curline.append(r'\multicolumn{2}{@{}c@{}}{\textit{%s}:%s \quad}'
                                   % (temp, writeFEvalsMaxPrec(refalgert[i], 2)))
                replaceValue = '<i>%s</i>:%s' % (temp, writeFEvalsMaxPrec(refalgert[i], 2))
                curlineHtml = [item.replace('REPLACE%d' % counter, replaceValue) for item in curlineHtml]
                counter += 1
                
            temp="%.1e" %targetsOfInterest((df[1], df[0]))[-1]
            if temp[-2]=="0":
                temp=temp[:-2]+temp[-1]
            curline.append(r'\multicolumn{2}{@{}c@{}|}{\textit{%s}:%s }'
                               % (temp ,writeFEvalsMaxPrec(refalgert[-1], 2))) 
            replaceValue = '<i>%s</i>:%s' % (temp, writeFEvalsMaxPrec(refalgert[-1], 2)) 
            curlineHtml = [item.replace('REPLACE%d' % counter, replaceValue) for item in curlineHtml]
        else:            
            # write #fevals of the reference alg
            counter = 1
            for i in refalgert[:-1]:
                curline.append(r'\multicolumn{2}{@{}c@{}}{%s \quad}'
                                   % writeFEvalsMaxPrec(i, 2))
                curlineHtml = [item.replace('REPLACE%d' % counter, writeFEvalsMaxPrec(i, 2)) for item in curlineHtml]
                counter += 1
            curline.append(r'\multicolumn{2}{@{}c@{}|}{%s}'
                               % writeFEvalsMaxPrec(refalgert[-1], 2))
            curlineHtml = [item.replace('REPLACE%d' % counter, writeFEvalsMaxPrec(refalgert[-1], 2)) for item in curlineHtml]

        # write the success ratio for the reference alg
        tmp2 = numpy.sum(numpy.isnan(refalgevals) == False) # count the nb of success
        curline.append('%d' % (tmp2))
        if tmp2 > 0:
            curline.append('/%d' % len(refalgevals))
            replaceValue = '%d/%d' % (tmp2, len(refalgevals))
        else:
            replaceValue = '%d' % tmp2
        curlineHtml = [item.replace('REPLACEF', replaceValue) for item in curlineHtml]

        table.append(curline[:])
        tableHtml.extend(curlineHtml[:])
        tableHtml.append('<tbody>\n')
        extraeol.append('')

        #for i, gna in enumerate(zip((1, 2, 3), ('bla', 'blo', 'bli'))):
            #print i, gna, gno
            #set_trace()
        # Format data
        #if df == (5, 17):
            #set_trace()

        header = r'\providecommand{\ntables}{7}'
        for i, alg in enumerate(algnames):
            tableHtml.append('<tr>\n')
            #algname, entries, irs, line, line2, succ, runs, testres1alg in zip(algnames,
            #data, dispersion, isBoldArray, isItalArray, nbsucc, nbruns, testres):
            commandname = r'\alg%stables' % numtotext(i)
#            header += r'\providecommand{%s}{{%s}{}}' % (commandname, str_to_latex(strip_pathname(alg)))
            header += r'\providecommand{%s}{\StrLeft{%s}{\ntables}}' % (commandname, str_to_latex(strip_pathname1(alg)))
            curline = [commandname + r'\hspace*{\fill}']  # each list element becomes a &-separated table entry?
            curlineHtml = ['<th>%s</th>\n' % str_to_latex(strip_pathname1(alg))]

            for j, tmp in enumerate(zip(algerts[i], algdisp[i],  # j is target index
                                        isBoldArray[i], algtestres[i])):
                ert, dispersion, isBold, testres = tmp
                alignment = '@{\,}X@{\,}'
                if j == len(algerts[i]) - 1:
                    alignment = '@{\,}X@{\,}|'

                data = ert/refalgert[j]
                # write star for significance against all other algorithms
                str_significance_subsup = ''
                str_significance_subsup_html = ''
                if (len(best_alg_idx) > 0 and len(significance_versus_others) > 0 and 
                    i == best_alg_idx[j] and nbtests * significance_versus_others[j][1] < 0.05):
                    logp = -numpy.ceil(numpy.log10(nbtests * significance_versus_others[j][1]))
                    logp = numpy.min((9, logp))  # not messing up the format and handling inf
                    str_significance_subsup =  r"^{%s%s}" % (significance_vs_others_symbol, str(int(logp)) if logp > 1 else '')
                    str_significance_subsup_html = '<sup>%s%s</sup>' % (significance_vs_others_symbol_html, str(int(logp)) if logp > 1 else '')

                # moved out of the above else: this was a bug!?
                z, p = testres
                if (nbtests * p) < 0.05 and data < 1. and z < 0.: 
                    if not numpy.isinf(refalgert[j]):
                        tmpevals = algevals[i][j].copy()
                        tmpevals[numpy.isnan(tmpevals)] = algentries[i].maxevals[numpy.isnan(tmpevals)]
                        bestevals = refalgentry.detEvals(targets)
                        bestevals, bestalgalg = (bestevals[0][0], bestevals[1][0])
                        bestevals[numpy.isnan(bestevals)] = refalgentry.maxevals[bestalgalg][numpy.isnan(bestevals)]
                        tmpevals = numpy.array(sorted(tmpevals))[0:min(len(tmpevals), len(bestevals))]
                        bestevals = numpy.array(sorted(bestevals))[0:min(len(tmpevals), len(bestevals))]

                    #The conditions are now that ERT < ERT_best and
                    # all(sorted(FEvals_best) > sorted(FEvals_current)).
                    if numpy.isinf(refalgert[j]) or all(tmpevals < bestevals):
                        nbstars = -numpy.ceil(numpy.log10(nbtests * p))
                        # tmp2[-1] += r'$^{%s}$' % superscript
                        str_significance_subsup += r'_{%s%s}' % (significance_vs_ref_symbol, 
                                                                 str(int(nbstars)) if nbstars > 1 else '')
                        str_significance_subsup_html = '<sub>%s%s</sub>' % (significance_vs_ref_symbol_html, 
                                                                 str(int(nbstars)) if nbstars > 1 else '')
                if str_significance_subsup:
                    str_significance_subsup = '$%s$' % str_significance_subsup

                # format number in variable data
                if numpy.isnan(data):
                    curline.append(r'\multicolumn{2}{%s}{.}' % alignment)
                else:
                    if numpy.isinf(refalgert[j]):
                        curline.append(r'\multicolumn{2}{%s}{\textbf{%s}\mbox{\tiny (%s)}%s}'
                                       % (alignment,
                                          writeFEvalsMaxPrec(algerts[i][j], 2),
                                          writeFEvalsMaxPrec(dispersion, precdispersion), 
                                          str_significance_subsup))
                        curlineHtml.append('<td sorttable_customkey=\"%f\"><b>%s</b> (%s)%s</td>\n'
                                       % (algerts[i][j],
                                          writeFEvalsMaxPrec(algerts[i][j], 2),
                                          writeFEvalsMaxPrec(dispersion, precdispersion), 
                                          str_significance_subsup_html))
                        continue

                    tmp = writeFEvalsMaxPrec(data, precfloat, maxfloatrepr=maxfloatrepr)
                    tmpHtml = writeFEvalsMaxPrec(data, precfloat, maxfloatrepr=maxfloatrepr)
                    sortKey = data
                    if data >= maxfloatrepr or data < 0.01: # either inf or scientific notation
                        if numpy.isinf(data) and j == len(algerts[i]) - 1:
                            tmp += r'\,\textit{%s}' % writeFEvalsMaxPrec(algfinaldata[i][1], 0, maxfloatrepr=maxfloatrepr)
                            tmpHtml += '<i>%s</i>' % writeFEvalsMaxPrec(algfinaldata[i][1], 0, maxfloatrepr=maxfloatrepr)
                            sortKey = algfinaldata[i][1]
                        else:
                            tmp = writeFEvalsMaxPrec(data, precscien, maxfloatrepr=data)
                            if isBold:
                                tmpHtml = '<b>%s</b>' % tmp
                                tmp = r'\textbf{%s}' % tmp

                        if not numpy.isnan(dispersion):
                            tmpdisp = dispersion/refalgert[j]
                            if tmpdisp >= maxfloatrepr or tmpdisp < 0.005: # TODO: hack
                                tmpdisp = writeFEvalsMaxPrec(tmpdisp, precdispersion, maxfloatrepr=tmpdisp)
                            else:
                                tmpdisp = writeFEvalsMaxPrec(tmpdisp, precdispersion, maxfloatrepr=maxfloatrepr)
                            tmp += r'\mbox{\tiny (%s)}' % tmpdisp
                            tmpHtml += ' (%s)' % tmpdisp
                        curline.append(r'\multicolumn{2}{%s}{%s%s}' % (alignment, tmp, str_significance_subsup))
                        tmpHtml = tmpHtml.replace('$\infty$', '&infin;')                
                        if (numpy.isinf(sortKey)):
                            sortKey = sys.maxint
                        curlineHtml.append('<td sorttable_customkey=\"%f\">%s%s</td>' % (sortKey, tmpHtml, str_significance_subsup_html))
                    else:
                        tmp2 = tmp.split('.', 1)
                        if len(tmp2) < 2:
                            tmp2.append('')
                        else:
                            tmp2[-1] = '.' + tmp2[-1]
                        if isBold:
                            tmp3 = []
                            tmp3html = []
                            for k in tmp2:
                                tmp3.append(r'\textbf{%s}' % k)
                                tmp3html.append('<b>%s</b>' % k)
                            tmp2 = tmp3
                            tmp2html = tmp3html
                        else:
                            tmp2html = []
                            tmp2html.extend(tmp2)
                        if not numpy.isnan(dispersion):
                            tmpdisp = dispersion/refalgert[j]
                            if tmpdisp >= maxfloatrepr or tmpdisp < 0.01:
                                tmpdisp = writeFEvalsMaxPrec(tmpdisp, precdispersion, maxfloatrepr=tmpdisp)
                            else:
                                tmpdisp = writeFEvalsMaxPrec(tmpdisp, precdispersion, maxfloatrepr=maxfloatrepr)
                            tmp2[-1] += (r'\mbox{\tiny (%s)}' % (tmpdisp))
                            tmp2html[-1] += ' (%s)' % tmpdisp
                        tmp2[-1] += str_significance_subsup
                        tmp2html[-1] += str_significance_subsup_html
                        curline.extend(tmp2)
                        tmp2html = ("").join(str(item) for item in tmp2html)
                        tmp2html = tmp2html.replace('$\infty$', '&infin;')                
                        curlineHtml.append('<td sorttable_customkey=\"%f\">%s</td>' % (data, tmp2html))
                                        
            curline.append('%d' % algnbsucc[i])
            curline.append('/%d' % algnbruns[i])
            table.append(curline)
            curlineHtml.append('<td sorttable_customkey=\"%d\">%d/%d</td>\n' % (algnbsucc[i], algnbsucc[i], algnbruns[i]))
            tableHtml.extend(curlineHtml[:])
            extraeol.append('')

        # Write table
        res = tableXLaTeX(table, spec=spec, extraeol=extraeol)
        try:
            filename = os.path.join(outputdir, 'pptables_f%03d_%02dD.tex' % (df[1], df[0]))
            f = open(filename, 'w')
            f.write(header + '\n')
            f.write(res)

            res = ("").join(str(item) for item in tableHtml)
            res = '\n<table class=\"sortable\" style=\"width:800px \">\n%s</table>\n<p/>\n' % res
    
            if df[0] in (5, 20):
                filename = os.path.join(outputdir, genericsettings.many_algorithm_file_name + '.html')
                lines = []
                with open(filename) as infile:
                    for line in infile:
                        if '<!--' + 'pptablesf%03d%02dDHtml' % (df[1], df[0]) + '-->' in line:
                            lines.append(res)
                        lines.append(line)
                        
                with open(filename, 'w') as outfile:
                    for line in lines:
                        outfile.write(line)     
    
            if verbose:
                print 'Wrote table in %s' % filename
        except:
            raise
        else:
            f.close()
Ejemplo n.º 4
0
def main(dsList0, dsList1, dimsOfInterest, outputdir, info='', verbose=True):
    """One table per dimension, modified to fit in 1 page per table."""

    #TODO: method is long, split if possible

    dictDim0 = dsList0.dictByDim()
    dictDim1 = dsList1.dictByDim()

    alg0 = set(i[0] for i in dsList0.dictByAlg().keys()).pop().replace(genericsettings.extraction_folder_prefix, '')[0:3]
    alg1 = set(i[0] for i in dsList1.dictByAlg().keys()).pop().replace(genericsettings.extraction_folder_prefix, '')[0:3]

    open(os.path.join(outputdir, 'bbob_pproc_commands.tex'), 'a'
         ).write(r'\providecommand{\algorithmAshort}{%s}' % writeLabels(alg0) + '\n' +
                 r'\providecommand{\algorithmBshort}{%s}' % writeLabels(alg1) + '\n')

    if info:
        info = '_' + info

    dims = set.intersection(set(dictDim0.keys()), set(dictDim1.keys()))
    bestalgentries = bestalg.loadBestAlgorithm(dsList0.isBiobjective())
    
    header = []
    if isinstance(targetsOfInterest, pproc.RunlengthBasedTargetValues):
        header = [r'\#FEs/D']
        headerHtml = ['<thead>\n<tr>\n<th>#FEs/D</th>\n']
        for label in targetsOfInterest.labels():
            header.append(r'\multicolumn{2}{@{}c@{}}{%s}' % label) 
            headerHtml.append('<td>%s</td>\n' % label)
    else:
        header = [r'$\Delta f_\mathrm{opt}$']
        headerHtml = ['<thead>\n<tr>\n<th>&#916; f</th>\n']
        for label in targetsOfInterest.labels():
            header.append(r'\multicolumn{2}{@{\,}c@{\,}}{%s}' % label)
            headerHtml.append('<td>%s</td>\n' % label)
    header.append(r'\multicolumn{2}{@{}l@{}}{\#succ}')
    headerHtml.append('<td>#succ</td>\n</tr>\n</thead>\n')
    
    for d in dimsOfInterest: # TODO set as input arguments
        table = [header]
        tableHtml = headerHtml
        extraeol = [r'\hline']
        try:
            dictFunc0 = dictDim0[d].dictByFunc()
            dictFunc1 = dictDim1[d].dictByFunc()
        except KeyError:
            continue
        funcs = set.union(set(dictFunc0.keys()), set(dictFunc1.keys()))

        nbtests = len(funcs) * 2. #len(dimsOfInterest)

        tableHtml.append('<tbody>\n')
        for f in sorted(funcs):
            tableHtml.append('<tr>\n')
            targets = targetsOfInterest((f, d))
            targetf = targets[-1]
            
            bestalgentry = bestalgentries[(d, f)]
            curline = [r'${\bf f_{%d}}$' % f]
            curlineHtml = ['<th><b>f<sub>%d</sub></b></th>\n' % f]
            bestalgdata = bestalgentry.detERT(targets)
            bestalgevals, bestalgalgs = bestalgentry.detEvals(targets)

            if isinstance(targetsOfInterest, pproc.RunlengthBasedTargetValues):
                # write ftarget:fevals
                for i in xrange(len(bestalgdata[:-1])):
                    temp = "%.1e" % targetsOfInterest((f, d))[i]
                    if temp[-2]=="0":
                        temp = temp[:-2]+temp[-1]
                    curline.append(r'\multicolumn{2}{@{}c@{}}{\textit{%s}:%s \quad}'
                                   % (temp,writeFEvalsMaxPrec(bestalgdata[i], 2)))
                    curlineHtml.append('<td><i>%s</i>:%s</td>\n' 
                                       % (temp, writeFEvalsMaxPrec(bestalgdata[i], 2)))
                temp = "%.1e" % targetsOfInterest((f, d))[-1]
                if temp[-2]=="0":
                    temp = temp[:-2]+temp[-1]
                curline.append(r'\multicolumn{2}{@{}c@{}|}{\textit{%s}:%s }'
                               % (temp,writeFEvalsMaxPrec(bestalgdata[-1], 2))) 
                curlineHtml.append('<td><i>%s</i>:%s</td>\n' 
                                   % (temp, writeFEvalsMaxPrec(bestalgdata[-1], 2))) 
            else:            
                # write #fevals of the reference alg
                for i in bestalgdata[:-1]:
                    curline.append(r'\multicolumn{2}{@{}c@{}}{%s \quad}'
                                   % writeFEvalsMaxPrec(i, 2))
                    curlineHtml.append('<td>%s</td>\n' % writeFEvalsMaxPrec(i, 2))

                curline.append(r'\multicolumn{2}{@{}c@{}|}{%s}'
                               % writeFEvalsMaxPrec(bestalgdata[-1], 2))
                curlineHtml.append('<td>%s</td>\n' % writeFEvalsMaxPrec(bestalgdata[-1], 2))

            tmp = bestalgentry.detEvals([targetf])[0][0]
            tmp2 = numpy.sum(numpy.isnan(tmp) == False)
            curline.append('%d' % (tmp2))
            if tmp2 > 0:
                curline.append('/%d' % len(tmp))
                curlineHtml.append('<td>%d/%d</td>\n' % (tmp2, len(tmp)))
            else:
                curlineHtml.append('<td>%d</td>\n' % (tmp2))

            table.append(curline[:])
            tableHtml.extend(curlineHtml[:])
            tableHtml.append('</tr>\n')
            extraeol.append('')

            rankdata0 = []  # never used

            # generate all data from ranksum test
            entries = []
            ertdata = {}
            for nb, dsList in enumerate((dictFunc0, dictFunc1)):
                try:
                    entry = dsList[f][0] # take the first DataSet, there should be only one?
                except KeyError:
                    warnings.warn('data missing for data set ' + str(nb) + ' and function ' + str(f))
                    print('*** Warning: data missing for data set ' + str(nb) + ' and function ' + str(f) + '***')
                    continue # TODO: problem here!
                ertdata[nb] = entry.detERT(targets)
                entries.append(entry)

            for _t in ertdata.values():
                for _tt in _t:
                    if _tt is None:
                        raise ValueError
                    
            if len(entries) < 2: # funcion not available for *both* algorithms
                continue  # TODO: check which one is missing and make sure that what is there is displayed properly in the following
            
            testres0vs1 = significancetest(entries[0], entries[1], targets)
            testresbestvs1 = significancetest(bestalgentry, entries[1], targets)
            testresbestvs0 = significancetest(bestalgentry, entries[0], targets)

            for nb, entry in enumerate(entries):
                tableHtml.append('<tr>\n')
                if nb == 0:
                    curline = [r'1:\:\algorithmAshort\hspace*{\fill}']
                    curlineHtml = ['<th>1: %s</th>\n' % alg0]
                else:
                    curline = [r'2:\:\algorithmBshort\hspace*{\fill}']
                    curlineHtml = ['<th>2: %s</th>\n' % alg1]

                #data = entry.detERT(targetsOfInterest)
                dispersion = []
                data = []
                evals = entry.detEvals(targets)
                for i in evals:
                    succ = (numpy.isnan(i) == False)
                    tmp = i.copy()
                    tmp[succ==False] = entry.maxevals[numpy.isnan(i)]
                    #set_trace()
                    data.append(toolsstats.sp(tmp, issuccessful=succ)[0])
                    #if not any(succ):
                        #set_trace()
                    if any(succ):
                        tmp2 = toolsstats.drawSP(tmp[succ], tmp[succ==False],
                                                (10, 50, 90), samplesize)[0]
                        dispersion.append((tmp2[-1]-tmp2[0])/2.)
                    else:
                        dispersion.append(None)

                if nb == 0:
                    assert not isinstance(data, numpy.ndarray)
                    data0 = data[:] # TODO: check if it is not an array, it's never used anyway?

                for i, dati in enumerate(data):  

                    z, p = testres0vs1[i] # TODO: there is something with the sign that I don't get
                    # assign significance flag, which is the -log10(p)
                    significance0vs1 = 0
                    if nb != 0:  
                        z = -z  # the test is symmetric
                    if nbtests * p < 0.05 and z > 0:  
                        significance0vs1 = -int(numpy.ceil(numpy.log10(min([1.0, nbtests * p]))))  # this is the larger the more significant

                    isBold = significance0vs1 > 0
                    alignment = 'c'
                    if i == len(data) - 1: # last element
                        alignment = 'c|'

                    if numpy.isinf(bestalgdata[i]): # if the 2009 best did not solve the problem

                        tmp = writeFEvalsMaxPrec(float(dati), 2)
                        if not numpy.isinf(dati):
                            tmpHtml = '<i>%s</i>' % (tmp)
                            tmp = r'\textit{%s}' % (tmp)
                            if isBold:
                                tmp = r'\textbf{%s}' % tmp
                                tmpHtml = '<b>%s</b>' % tmpHtml

                        if dispersion[i] and numpy.isfinite(dispersion[i]):
                            tmp += r'${\scriptscriptstyle (%s)}$' % writeFEvalsMaxPrec(dispersion[i], 1)
                        tableentry = (r'\multicolumn{2}{@{}%s@{}}{%s}'
                                      % (alignment, tmp))
                        tableentryHtml = (' (%s)' % tmp)
                    else:
                        # Formatting
                        tmp = float(dati)/bestalgdata[i]
                        assert not numpy.isnan(tmp)
                        isscientific = False
                        if tmp >= 1000:
                            isscientific = True
                        tableentry = writeFEvals2(tmp, 2, isscientific=isscientific)
                        tableentry = writeFEvalsMaxPrec(tmp, 2)
                        tableentryHtml = writeFEvalsMaxPrec(tmp, 2)

                        if numpy.isinf(tmp) and i == len(data)-1:
                            tableentry = (tableentry 
                                          + r'\textit{%s}' % writeFEvals2(numpy.median(entry.maxevals), 2))
                            tableentryHtml = (tableentryHtml
                                          + ' <i>%s</i>' % writeFEvals2(numpy.median(entry.maxevals), 2))
                            if isBold:
                                tableentry = r'\textbf{%s}' % tableentry
                                tableentryHtml = '<b>%s</b>' % tableentryHtml
                            elif 11 < 3 and significance0vs1 < 0:  # cave: negative significance has no meaning anymore
                                tableentry = r'\textit{%s}' % tableentry
                                tableentryHtml = '<i>%s</i>' % tableentryHtml
                            if dispersion[i] and numpy.isfinite(dispersion[i]/bestalgdata[i]):
                                tableentry += r'${\scriptscriptstyle (%s)}$' % writeFEvalsMaxPrec(dispersion[i]/bestalgdata[i], 1)
                                tableentryHtml += ' (%s)' % writeFEvalsMaxPrec(dispersion[i]/bestalgdata[i], 1)
                            tableentry = (r'\multicolumn{2}{@{}%s@{}}{%s}'
                                          % (alignment, tableentry))

                        elif tableentry.find('e') > -1 or (numpy.isinf(tmp) and i != len(data) - 1):
                            if isBold:
                                tableentry = r'\textbf{%s}' % tableentry
                                tableentryHtml = '<b>%s</b>' % tableentryHtml
                            elif 11 < 3 and significance0vs1 < 0:
                                tableentry = r'\textit{%s}' % tableentry
                                tableentryHtml = '<i>%s</i>' % tableentryHtml
                            if dispersion[i] and numpy.isfinite(dispersion[i]/bestalgdata[i]):
                                tableentry += r'${\scriptscriptstyle (%s)}$' % writeFEvalsMaxPrec(dispersion[i]/bestalgdata[i], 1)
                                tableentryHtml += ' (%s)' % writeFEvalsMaxPrec(dispersion[i]/bestalgdata[i], 1)
                            tableentry = (r'\multicolumn{2}{@{}%s@{}}{%s}'
                                          % (alignment, tableentry))
                        else:
                            tmp = tableentry.split('.', 1)
                            tmpHtml = tableentryHtml.split('.', 1)
                            if isBold:
                                tmp = list(r'\textbf{%s}' % i for i in tmp)
                                tmpHtml = list('<b>%s</b>' % i for i in tmpHtml)
                            elif 11 < 3 and significance0vs1 < 0:
                                tmp = list(r'\textit{%s}' % i for i in tmp)
                                tmpHtml = list('<i>%s</i>' % i for i in tmpHtml)
                            tableentry = ' & .'.join(tmp)
                            tableentryHtml = '.'.join(tmpHtml)
                            if len(tmp) == 1:
                                tableentry += '&'
                            if dispersion[i] and numpy.isfinite(dispersion[i]/bestalgdata[i]):
                                tableentry += r'${\scriptscriptstyle (%s)}$' % writeFEvalsMaxPrec(dispersion[i]/bestalgdata[i], 1)
                                tableentryHtml += ' (%s)' % writeFEvalsMaxPrec(dispersion[i]/bestalgdata[i], 1)

                    superscript = ''
                    superscriptHtml = ''

                    if nb == 0:
                        z, p = testresbestvs0[i]
                    else:
                        z, p = testresbestvs1[i]

                    #The conditions are now that ERT < ERT_best
                    if ((nbtests * p) < 0.05 and dati - bestalgdata[i] < 0.
                        and z < 0.):
                        nbstars = -numpy.ceil(numpy.log10(nbtests * p))
                        #tmp = '\hspace{-.5ex}'.join(nbstars * [r'\star'])
                        if z > 0:
                            superscript = r'\uparrow' #* nbstars
                            superscriptHtml = '&uarr;'
                        else:
                            superscript = r'\downarrow' #* nbstars
                            superscriptHtml = '&darr;'
                            # print z, linebest[i], line1
                        if nbstars > 1:
                            superscript += str(int(nbstars))
                            superscriptHtml += str(int(nbstars))

                    if superscript or significance0vs1:
                        s = ''
                        shtml = ''
                        if significance0vs1 > 0:
                            s = '\star'
                            shtml = '&#9733;'
                        if significance0vs1 > 1:
                            s += str(significance0vs1)
                            shtml += str(significance0vs1)
                        s = r'$^{' + s + superscript + r'}$'
                        shtml = '<sup>' + shtml + superscriptHtml + '</sup>' 

                        if tableentry.endswith('}'):
                            tableentry = tableentry[:-1] + s + r'}'
                        else:
                            tableentry += s
                        tableentryHtml += shtml

                    tableentryHtml = tableentryHtml.replace('$\infty$', '&infin;')                
                    curlineHtml.append('<td>%s</td>\n' % tableentryHtml)
                    curline.append(tableentry)

                    #curline.append(tableentry)
                    #if dispersion[i] is None or numpy.isinf(bestalgdata[i]):
                        #curline.append('')
                    #else:
                        #tmp = writeFEvalsMaxPrec(dispersion[i]/bestalgdata[i], 2)
                        #curline.append('(%s)' % tmp)

                tmp = entry.evals[entry.evals[:, 0] <= targetf, 1:]
                try:
                    tmp = tmp[0]
                    curline.append('%d' % numpy.sum(numpy.isnan(tmp) == False))
                    curlineHtml.append('<td>%d' % numpy.sum(numpy.isnan(tmp) == False))
                except IndexError:
                    curline.append('%d' % 0)
                    curlineHtml.append('<td>%d' % 0)
                curline.append('/%d' % entry.nbRuns())
                curlineHtml.append('/%d</td>\n' % entry.nbRuns())

                table.append(curline[:])
                tableHtml.extend(curlineHtml[:])
                tableHtml.append('</tr>\n')
                extraeol.append('')

            extraeol[-1] = r'\hline'
        extraeol[-1] = ''

        outputfile = os.path.join(outputdir, 'pptable2_%02dD%s.tex' % (d, info))
        spec = r'@{}c@{}|' + '*{%d}{@{}r@{}@{}l@{}}' % len(targetsOfInterest) + '|@{}r@{}@{}l@{}'
        res = r'\providecommand{\algorithmAshort}{%s}' % writeLabels(alg0) + '\n'
        res += r'\providecommand{\algorithmBshort}{%s}' % writeLabels(alg1) + '\n'
        # open(os.path.join(outputdir, 'bbob_pproc_commands.tex'), 'a').write(res)
        
        #res += tableLaTeXStar(table, width=r'0.45\textwidth', spec=spec,
                              #extraeol=extraeol)
        res += tableLaTeX(table, spec=spec, extraeol=extraeol)
        f = open(outputfile, 'w')
        f.write(res)
        f.close()
        
        res = ("").join(str(item) for item in tableHtml)
        res = '<p><b>%d-D</b></p>\n<table>\n%s</table>\n' % (d, res)

        filename = os.path.join(outputdir, genericsettings.two_algorithm_file_name + '.html')
        lines = []
        with open(filename) as infile:
            for line in infile:
                if '<!--pptable2Html-->' in line:
                    lines.append(res)
                lines.append(line)
                
        with open(filename, 'w') as outfile:
            for line in lines:
                outfile.write(line)     

        if verbose:
            print "Table written in %s" % outputfile