Ejemplo n.º 1
0
def plot_diff(axOrig, ax, lineR, lineE, ylab, params):
    xlab, ylab = axOrig.get_xlabel(), ylab
    cr, ce = PH.get_palette(['True repair'
                             ])[0], PH.get_palette(['True example'])[0]
    minX, maxX, markersize = H.fetchExists_list(params,
                                                ['minX', 'maxX', 'markersize'])
    if markersize is None: markersize = 1

    def get_xy(lines):
        xs, ys = [], []
        for x, y in sorted(lines.items()):
            if maxX == None or x <= maxX:
                xs.append(x)
                ys.append(y)
        return xs, ys

    xR, yR = get_xy(lineR)
    xE, yE = get_xy(lineE)

    ax.plot(xR, yR, 'o-', markersize=markersize, c=cr, linewidth=1)
    ax.plot(xE, yE, 'o-', markersize=markersize, c=ce, linewidth=1)
    ax.axhline(y=0, c='k', linewidth=1, linestyle='--')  # horizontal line

    #if H.fetchExists(params, 'freq'):
    #    ax.set_xticks(ax.get_xticks()[::params['freq']])
    #    ax.set_xticklabels(ax.get_xticks())
    if H.fetchExists(params, 'showTitle'):
        ax.set_title(axOrig.get_title())
    if H.fetchExists(params, 'showXlabel'):
        ax.set_xlabel(params['xlabel'], fontsize=8)

    #plot_fillArea(ax)
    ax.xaxis.grid()
    ax.yaxis.grid()
Ejemplo n.º 2
0
def delta(fig, gs, params):
    nPlots = 1  # 2 if adding best as well

    axesOrig = fig.get_axes()
    newFig, ncols = H.fetchExists_list(params, ['newFig', 'ncols'])

    ylabBest, ylabAvg = r'$\delta$ Best', r'$\delta$ Average'
    axes = [
        fig.add_subplot(gs[ncols + index]) for index in range(ncols * nPlots)
    ]

    # For each one of the axes/subfig, plot its delta
    for index in range(ncols):
        axOrig = axesOrig[index]
        axAvg = axes[index]
        #axBest  = axes[ncols+index]

        bestR, bestE, avgR, avgE = delta_axes(axOrig, params, index)
        #plot_diff(axOrig, axBest, bestR, bestE, ylabBest, params)
        plot_diff(axOrig, axAvg, avgR, avgE, ylabAvg, params)

    adjust_figs(fig, axes, params)
    fpath, fname = H.fetchExists_list(params, ['fpath', 'fname'])
    PH.savefig(fig, fpath, fname, fname)

    return fig
Ejemplo n.º 3
0
def writeErrSetFile(name, fname):
    headers, lines = H.readCSV(fname)
    headers.append("ErrSet")
    dictErrDiff = {} # {CompErr1:ErrSet1, ...}
    allErrs = readPrev_AllErrors()
    count = 0
    print('Total #src-target pairs=',len(lines))

    indexErrPrutor = headers.index("sourceErrorPrutor")
    indexErrClang = headers.index("sourceErrorClangParse")
    indexLineNums = headers.index("lineNums_Abs")
    indexDi, indexDd = headers.index("diffAbs_ins"), headers.index("diffAbs_del")

    for line in lines:        
        count += 1
        if count%1000==0:
            print(count,'/',len(lines),'done ...')

        diffsI, diffsD = line[indexDi].splitlines(), line[indexDd].splitlines()
        errPrutor, errClang, diffLineNums = line[indexErrPrutor], line[indexErrClang], set(line[indexLineNums].splitlines())
        errPrutor, errClang = errPrutor.replace('\r', '\n'), errClang.replace('\r', '\n')

        errSet, errExpList, compLineNums = getErrSet(allErrs, dictErrDiff, errPrutor) # Get the err-set (unique rep for set of errors)
        clusterErr(errSet, diffsI, diffsD) # Cluster the diffs (add the diff to dictErrDiff)
        errSet.calcIntersection(compLineNums, diffLineNums) # Update counts to calc precision-recall of compiler lineNums

        line.append(errSet.key)
        
    H.writeCSV(fname, headers, lines)
    writeAllErrs(allErrs)
    writeClusterErr(name, dictErrDiff)
Ejemplo n.º 4
0
def get_count(df_range, yType):
    count = float(len(df_range))
    if yType == 'numErrors':
        return count
    elif yType == 'timeTaken':
        return H.div(sum(df_range['timeTaken (sec)']), count)
    elif yType == 'numAttempt':
        return H.div(sum(df_range['numAttempt']), count)
Ejemplo n.º 5
0
def adjust_labels(fig, params):
    axes = fig.get_axes()
    xlabels, ylabels, titles = H.fetchExists_list(
        params, ['xlabels', 'ylabels', 'titles'])
    xlabels, ylabels, ylabelpos = H.fetchExists_list(
        params, ['xlabels', 'ylabels', 'ylabelpos'])

    for index in range(len(axes)):
        ax = axes[index]
        if titles is not None:
            ax.set_title(titles[index], fontsize=8)

        if xlabels is not None:
            ax.set_xlabel(xlabels[index], fontsize=8)

        if ylabels is not None:
            if ylabelpos is None: ylabelpos = 'right'
            ax.set_ylabel(ylabels[index], fontsize=8)
            ax.yaxis.set_label_position(ylabelpos)
Ejemplo n.º 6
0
def adjust_suplabels(fig, params):
    xsuplabel, ysuplabel = H.fetchExists_list(params,
                                              ['xsuplabel', 'ysuplabel'])
    xsuppad, ysuppad = H.fetchExists_list(params, ['xsuppad', 'ysuppad'])
    if xsuppad is None: xsuppad = 0.2
    if ysuppad is None: ysuppad = 0.2

    # For the distance between legend and titles
    if xsuplabel is not None:
        fig.text(0.5, xsuppad, xsuplabel, va='center', ha='center')
    if ysuplabel is not None:
        fig.text(ysuppad,
                 0.5,
                 ysuplabel,
                 va='center',
                 ha='center',
                 rotation=90)

    left, right, top, bot = H.fetchExists_list(
        params, ['left', 'right', 'top', 'bottom'])
    fig.subplots_adjust(left=left, right=right, top=top, bottom=bot)
Ejemplo n.º 7
0
def get_errExp(errSet):
    if errSet == 'ALL': return errSet
    retList = []

    for errAct in errSet.split(';'):
        errAct = errAct.strip()
        if errAct != '':
            for errExp in ALL_ERRS:
                if str(ALL_ERRS[errExp].getIndex()) == errAct:
                    retList.append(errAct + ': ' + errExp + '; ')
                    break

    return H.joinList(retList, ' ')
Ejemplo n.º 8
0
def get_dictXY(ax, params, indexAx, indexL):
    kmfs, points, maxX = H.fetchExists_list(params, ['kmfs', 'points', 'maxX'])
    nsplits = len(CF.semesters
                  ) - 1 + 2  # -1 to discount feedback sem, +2 to add repair/eg

    x1 = get_axLine(ax, indexL).get_xydata()[:, 0]
    if kmfs is not None:  # If survival plots passed, accurately predicting using kmf
        index = indexAx * nsplits + indexL
        x1 = range(0, maxX + 1)
        y1 = [kmfs[index].predict(x) for x in x1]
    elif points is not None:  # Elif points for each axes passed
        x1, y1 = points[indexAx][indexL]
    else:  # go old-fashion: Pick lines from axesOrig
        y1 = get_axLine(ax, indexL).get_xydata()[:, 1]

    return {i: j for i, j in zip(x1, y1)}
Ejemplo n.º 9
0
def readPrev_AllErrors():
    '''Check if indexing of errors (sorted based on count) is already present in the path.
    Based on some previous run (or semester). If so, use that indexing (most freq comp error gets index-1)'''
    allErrs = {}
    try:
        headers, lines = H.readCSV(CF.fname_errorIDs)
        indexIndex, indexErrExp = headers.index('index'), headers.index('error')

        for line in lines:
            index, errExp = line[indexIndex], line[indexErrExp]
            allErrs[errExp] = Error(errExp, index=index)

    except IOError:
        pass

    return allErrs
Ejemplo n.º 10
0
def adjust_ticks(fig, params):
    axes = fig.get_axes()
    showXTicks, showYTicks, xticks = H.fetchExists_list(
        params, ['showXTicks', 'showYTicks', 'xticks'])

    for index in range(len(axes)):
        ax = axes[index]
        if xticks is not None:
            ax.xaxis.set_major_locator(ticker.FixedLocator(xticks))

        if showXTicks is not None:
            if index + 1 not in showXTicks:
                ax.set_xticklabels([])

        if showYTicks is not None:
            if index + 1 not in showYTicks:
                ax.set_yticklabels([])
Ejemplo n.º 11
0
def plotGroup(df,
              fitter,
              TName,
              EName=None,
              groupBy=None,
              splitBy=None,
              params={}):
    xlim, ylim, title = H.fetchExists_list(params, ['xlim', 'ylim', 'title'])
    fpath, fname, xlabel, ylabel = H.fetchExists_list(
        params, ['fpath', 'fname', 'xlabel', 'ylabel'])
    height, width = H.fetchExists_list(params, ['height', 'width'])
    top, left, bot = H.fetchExists_list(params, ['top', 'left', 'bot'])
    xlabelpad, ylabelpad = H.fetchExists_list(params,
                                              ['xlabelpad', 'ylabelpad'])
    wspace, hspace, revSortSplit = H.fetchExists_list(
        params, ['wspace', 'hspace', 'revSortSplit'])
    splitKeys, replaceSplit = H.fetchExists_list(params,
                                                 ['splitKeys', 'replaceSplit'])

    nrows, ncols, height_ratios = H.fetchExists_list(
        params, ['nrows', 'ncols', 'height_ratios'])
    indexp, maxX, maxY = 0, 0, 0
    fig = plt.figure(figsize=(width, height))
    gs = plt.GridSpec(nrows,
                      ncols,
                      height_ratios=height_ratios,
                      wspace=wspace,
                      hspace=hspace)

    newKMFs = []
    if revSortSplit is None: revSortSplit = False
    if splitKeys is None:
        splitKeys = sorted(df[splitBy].unique(), reverse=revSortSplit)
    print('\t\t{}'.format(splitKeys))

    for indexSplit in range(len(splitKeys)):
        uniqSplit = splitKeys[indexSplit]

        fitter = KaplanMeierFitter()  # Init a new KMF for each new split/axes
        T, E, groups = get_groups(df, TName, EName, splitBy, groupBy,
                                  uniqSplit)
        ax = fig.add_subplot(gs[indexp])
        indexp += 1

        uniqGroups = sorted(
            groups.unique(), reverse=True
        )  # Reverse sort, so that the "example" and "repair" group are always top

        indexT, indexS = 0, 0  # Index Tool, and Index Sem
        for index in range(
                len(uniqGroups
                    )):  # Sort so that colour coding (of labels) remains same
            uniqGroup = uniqGroups[index]
            newKMF = set_fit(fitter, T, E, groups, uniqGroup)
            newKMFs.append(
                newKMF)  # Return these independent kmfs for Delta plots

            c, indexT, indexS = PH.get_color(uniqGroup, indexT, indexS)
            newKMF.plot(ax=ax, color=c, linewidth=1, ci_show=False)

        axTitle = uniqSplit
        if replaceSplit is not None: axTitle = replaceSplit[indexSplit]
        set_axAttr(ax, axTitle, xlabel, ylabel)
        maxX, maxY = get_pltLimits(ax, maxX, maxY, xlim, ylim)

    title = set_pltLimits(fig, maxX, maxY, title)

    fig.tight_layout()

    #PH.suplabel(fig, 'x', xlabel, labelpad=xlabelpad)
    #PH.suplabel(fig, 'y', ylabel, labelpad=ylabelpad)

    PH.set_legend(fig, ncol=len(df[groupBy].unique()))

    fig.subplots_adjust(
        left=left, top=top,
        bottom=bot)  # For the distance between legend and titles
    PH.savefig(fig, fpath, fname, title)

    return fig, gs, newKMFs
Ejemplo n.º 12
0
def plot_bar(dict_dfNumErr, params, isPlotErrs=False):
    axes = []
    indexPlot = 0

    y, height, width = H.fetchExists_list(params, ['yType', 'height', 'width'])
    wspace, hspace = H.fetchExists_list(params, ['wspace', 'hspace'])
    nrows, ncols, height_ratios = H.fetchExists_list(
        params, ['nrows', 'ncols', 'height_ratios'])

    fig = plt.figure(figsize=(width, height))
    gs = plt.GridSpec(nrows,
                      ncols,
                      height_ratios=height_ratios,
                      wspace=wspace,
                      hspace=hspace)
    points = []

    if isPlotErrs:
        # Pre-pend ALL first in sorting
        errSets = ['ALL'] + sorted(
            [i for i in dict_dfNumErr.keys() if i != 'ALL'])
    else:
        errSets = ['ALL']

    sns.set_context(rc={'lines.linewidth': 0.5})  # Thinner lines

    for errSet in errSets:
        df_numErr = dict_dfNumErr[errSet]
        df_numErr['winStart_num'] = map(time2num, df_numErr['winStart'])
        df_numErr['winEnd_num'] = map(time2num, df_numErr['winEnd'])

        labs = df_numErr['labs'].unique()

        for lab in sorted(labs):
            df = df_numErr[df_numErr['labs'] == lab]
            df_plot = df.sort_values(['winStart_num', 'tool'],
                                     ascending=[True, False])

            ax = fig.add_subplot(gs[indexPlot])
            pointsAx = []

            for tool in df_plot['tool'].unique():
                pointsX = df_plot[df_plot['tool'] == tool]['winStart_num']
                pointsY = df_plot[df_plot['tool'] == tool][y]
                pointsAx.append((pointsX, pointsY))

                color = PH.get_palette([tool])[0]
                ax.plot(pointsX,
                        pointsY,
                        'o-',
                        markersize=2,
                        color=color,
                        linewidth=1,
                        label=tool)

            #sns.factorplot(data=df_plot, ax=ax, kind="point", x="winStart_num", y=y,
            #    hue="tool", palette=PH.get_palette(df_plot['tool'].unique()))

            set_xaxis(ax)
            if isPlotErrs: ax.set_title(str(errSet))
            if y == 'numErrors_LOC-sumAll':  # Cut short the (irrevant) highs for LOC div
                ax.set_ylim([ax.get_ylim()[0], ax.get_ylim()[1] / 2.0])

            points.append(pointsAx)
            indexPlot += 1

    PH.set_legend(fig, ncol=len(dict_dfNumErr['ALL']['tool'].unique()))
    PH.set_axLim(fig)
    adjust_limits(fig, params)

    fpath, fname = H.fetchExists_list(params, ['fpath', 'fname'])
    PH.savefig(fig, fpath, fname, '')

    return fig, gs, points
Ejemplo n.º 13
0
def adjust_limits(fig, params):
    minX, maxX = H.fetchExists_list(params, ['minX', 'maxX'])
    minY, maxY = H.fetchExists_list(params, ['minY', 'maxY'])
    PH.set_axLim(fig, minX=minX, maxX=maxX, minY=minY, maxY=maxY)