def plot_diff(axOrig, ax, lineR, lineE, ylab, params): xlab, ylab = axOrig.get_xlabel(), ylab cr, ce = PH.get_palette(['True repair' ])[0], PH.get_palette(['True example'])[0] minX, maxX, markersize = H.fetchExists_list(params, ['minX', 'maxX', 'markersize']) if markersize is None: markersize = 1 def get_xy(lines): xs, ys = [], [] for x, y in sorted(lines.items()): if maxX == None or x <= maxX: xs.append(x) ys.append(y) return xs, ys xR, yR = get_xy(lineR) xE, yE = get_xy(lineE) ax.plot(xR, yR, 'o-', markersize=markersize, c=cr, linewidth=1) ax.plot(xE, yE, 'o-', markersize=markersize, c=ce, linewidth=1) ax.axhline(y=0, c='k', linewidth=1, linestyle='--') # horizontal line #if H.fetchExists(params, 'freq'): # ax.set_xticks(ax.get_xticks()[::params['freq']]) # ax.set_xticklabels(ax.get_xticks()) if H.fetchExists(params, 'showTitle'): ax.set_title(axOrig.get_title()) if H.fetchExists(params, 'showXlabel'): ax.set_xlabel(params['xlabel'], fontsize=8) #plot_fillArea(ax) ax.xaxis.grid() ax.yaxis.grid()
def delta(fig, gs, params): nPlots = 1 # 2 if adding best as well axesOrig = fig.get_axes() newFig, ncols = H.fetchExists_list(params, ['newFig', 'ncols']) ylabBest, ylabAvg = r'$\delta$ Best', r'$\delta$ Average' axes = [ fig.add_subplot(gs[ncols + index]) for index in range(ncols * nPlots) ] # For each one of the axes/subfig, plot its delta for index in range(ncols): axOrig = axesOrig[index] axAvg = axes[index] #axBest = axes[ncols+index] bestR, bestE, avgR, avgE = delta_axes(axOrig, params, index) #plot_diff(axOrig, axBest, bestR, bestE, ylabBest, params) plot_diff(axOrig, axAvg, avgR, avgE, ylabAvg, params) adjust_figs(fig, axes, params) fpath, fname = H.fetchExists_list(params, ['fpath', 'fname']) PH.savefig(fig, fpath, fname, fname) return fig
def writeErrSetFile(name, fname): headers, lines = H.readCSV(fname) headers.append("ErrSet") dictErrDiff = {} # {CompErr1:ErrSet1, ...} allErrs = readPrev_AllErrors() count = 0 print('Total #src-target pairs=',len(lines)) indexErrPrutor = headers.index("sourceErrorPrutor") indexErrClang = headers.index("sourceErrorClangParse") indexLineNums = headers.index("lineNums_Abs") indexDi, indexDd = headers.index("diffAbs_ins"), headers.index("diffAbs_del") for line in lines: count += 1 if count%1000==0: print(count,'/',len(lines),'done ...') diffsI, diffsD = line[indexDi].splitlines(), line[indexDd].splitlines() errPrutor, errClang, diffLineNums = line[indexErrPrutor], line[indexErrClang], set(line[indexLineNums].splitlines()) errPrutor, errClang = errPrutor.replace('\r', '\n'), errClang.replace('\r', '\n') errSet, errExpList, compLineNums = getErrSet(allErrs, dictErrDiff, errPrutor) # Get the err-set (unique rep for set of errors) clusterErr(errSet, diffsI, diffsD) # Cluster the diffs (add the diff to dictErrDiff) errSet.calcIntersection(compLineNums, diffLineNums) # Update counts to calc precision-recall of compiler lineNums line.append(errSet.key) H.writeCSV(fname, headers, lines) writeAllErrs(allErrs) writeClusterErr(name, dictErrDiff)
def get_count(df_range, yType): count = float(len(df_range)) if yType == 'numErrors': return count elif yType == 'timeTaken': return H.div(sum(df_range['timeTaken (sec)']), count) elif yType == 'numAttempt': return H.div(sum(df_range['numAttempt']), count)
def adjust_labels(fig, params): axes = fig.get_axes() xlabels, ylabels, titles = H.fetchExists_list( params, ['xlabels', 'ylabels', 'titles']) xlabels, ylabels, ylabelpos = H.fetchExists_list( params, ['xlabels', 'ylabels', 'ylabelpos']) for index in range(len(axes)): ax = axes[index] if titles is not None: ax.set_title(titles[index], fontsize=8) if xlabels is not None: ax.set_xlabel(xlabels[index], fontsize=8) if ylabels is not None: if ylabelpos is None: ylabelpos = 'right' ax.set_ylabel(ylabels[index], fontsize=8) ax.yaxis.set_label_position(ylabelpos)
def adjust_suplabels(fig, params): xsuplabel, ysuplabel = H.fetchExists_list(params, ['xsuplabel', 'ysuplabel']) xsuppad, ysuppad = H.fetchExists_list(params, ['xsuppad', 'ysuppad']) if xsuppad is None: xsuppad = 0.2 if ysuppad is None: ysuppad = 0.2 # For the distance between legend and titles if xsuplabel is not None: fig.text(0.5, xsuppad, xsuplabel, va='center', ha='center') if ysuplabel is not None: fig.text(ysuppad, 0.5, ysuplabel, va='center', ha='center', rotation=90) left, right, top, bot = H.fetchExists_list( params, ['left', 'right', 'top', 'bottom']) fig.subplots_adjust(left=left, right=right, top=top, bottom=bot)
def get_errExp(errSet): if errSet == 'ALL': return errSet retList = [] for errAct in errSet.split(';'): errAct = errAct.strip() if errAct != '': for errExp in ALL_ERRS: if str(ALL_ERRS[errExp].getIndex()) == errAct: retList.append(errAct + ': ' + errExp + '; ') break return H.joinList(retList, ' ')
def get_dictXY(ax, params, indexAx, indexL): kmfs, points, maxX = H.fetchExists_list(params, ['kmfs', 'points', 'maxX']) nsplits = len(CF.semesters ) - 1 + 2 # -1 to discount feedback sem, +2 to add repair/eg x1 = get_axLine(ax, indexL).get_xydata()[:, 0] if kmfs is not None: # If survival plots passed, accurately predicting using kmf index = indexAx * nsplits + indexL x1 = range(0, maxX + 1) y1 = [kmfs[index].predict(x) for x in x1] elif points is not None: # Elif points for each axes passed x1, y1 = points[indexAx][indexL] else: # go old-fashion: Pick lines from axesOrig y1 = get_axLine(ax, indexL).get_xydata()[:, 1] return {i: j for i, j in zip(x1, y1)}
def readPrev_AllErrors(): '''Check if indexing of errors (sorted based on count) is already present in the path. Based on some previous run (or semester). If so, use that indexing (most freq comp error gets index-1)''' allErrs = {} try: headers, lines = H.readCSV(CF.fname_errorIDs) indexIndex, indexErrExp = headers.index('index'), headers.index('error') for line in lines: index, errExp = line[indexIndex], line[indexErrExp] allErrs[errExp] = Error(errExp, index=index) except IOError: pass return allErrs
def adjust_ticks(fig, params): axes = fig.get_axes() showXTicks, showYTicks, xticks = H.fetchExists_list( params, ['showXTicks', 'showYTicks', 'xticks']) for index in range(len(axes)): ax = axes[index] if xticks is not None: ax.xaxis.set_major_locator(ticker.FixedLocator(xticks)) if showXTicks is not None: if index + 1 not in showXTicks: ax.set_xticklabels([]) if showYTicks is not None: if index + 1 not in showYTicks: ax.set_yticklabels([])
def plotGroup(df, fitter, TName, EName=None, groupBy=None, splitBy=None, params={}): xlim, ylim, title = H.fetchExists_list(params, ['xlim', 'ylim', 'title']) fpath, fname, xlabel, ylabel = H.fetchExists_list( params, ['fpath', 'fname', 'xlabel', 'ylabel']) height, width = H.fetchExists_list(params, ['height', 'width']) top, left, bot = H.fetchExists_list(params, ['top', 'left', 'bot']) xlabelpad, ylabelpad = H.fetchExists_list(params, ['xlabelpad', 'ylabelpad']) wspace, hspace, revSortSplit = H.fetchExists_list( params, ['wspace', 'hspace', 'revSortSplit']) splitKeys, replaceSplit = H.fetchExists_list(params, ['splitKeys', 'replaceSplit']) nrows, ncols, height_ratios = H.fetchExists_list( params, ['nrows', 'ncols', 'height_ratios']) indexp, maxX, maxY = 0, 0, 0 fig = plt.figure(figsize=(width, height)) gs = plt.GridSpec(nrows, ncols, height_ratios=height_ratios, wspace=wspace, hspace=hspace) newKMFs = [] if revSortSplit is None: revSortSplit = False if splitKeys is None: splitKeys = sorted(df[splitBy].unique(), reverse=revSortSplit) print('\t\t{}'.format(splitKeys)) for indexSplit in range(len(splitKeys)): uniqSplit = splitKeys[indexSplit] fitter = KaplanMeierFitter() # Init a new KMF for each new split/axes T, E, groups = get_groups(df, TName, EName, splitBy, groupBy, uniqSplit) ax = fig.add_subplot(gs[indexp]) indexp += 1 uniqGroups = sorted( groups.unique(), reverse=True ) # Reverse sort, so that the "example" and "repair" group are always top indexT, indexS = 0, 0 # Index Tool, and Index Sem for index in range( len(uniqGroups )): # Sort so that colour coding (of labels) remains same uniqGroup = uniqGroups[index] newKMF = set_fit(fitter, T, E, groups, uniqGroup) newKMFs.append( newKMF) # Return these independent kmfs for Delta plots c, indexT, indexS = PH.get_color(uniqGroup, indexT, indexS) newKMF.plot(ax=ax, color=c, linewidth=1, ci_show=False) axTitle = uniqSplit if replaceSplit is not None: axTitle = replaceSplit[indexSplit] set_axAttr(ax, axTitle, xlabel, ylabel) maxX, maxY = get_pltLimits(ax, maxX, maxY, xlim, ylim) title = set_pltLimits(fig, maxX, maxY, title) fig.tight_layout() #PH.suplabel(fig, 'x', xlabel, labelpad=xlabelpad) #PH.suplabel(fig, 'y', ylabel, labelpad=ylabelpad) PH.set_legend(fig, ncol=len(df[groupBy].unique())) fig.subplots_adjust( left=left, top=top, bottom=bot) # For the distance between legend and titles PH.savefig(fig, fpath, fname, title) return fig, gs, newKMFs
def plot_bar(dict_dfNumErr, params, isPlotErrs=False): axes = [] indexPlot = 0 y, height, width = H.fetchExists_list(params, ['yType', 'height', 'width']) wspace, hspace = H.fetchExists_list(params, ['wspace', 'hspace']) nrows, ncols, height_ratios = H.fetchExists_list( params, ['nrows', 'ncols', 'height_ratios']) fig = plt.figure(figsize=(width, height)) gs = plt.GridSpec(nrows, ncols, height_ratios=height_ratios, wspace=wspace, hspace=hspace) points = [] if isPlotErrs: # Pre-pend ALL first in sorting errSets = ['ALL'] + sorted( [i for i in dict_dfNumErr.keys() if i != 'ALL']) else: errSets = ['ALL'] sns.set_context(rc={'lines.linewidth': 0.5}) # Thinner lines for errSet in errSets: df_numErr = dict_dfNumErr[errSet] df_numErr['winStart_num'] = map(time2num, df_numErr['winStart']) df_numErr['winEnd_num'] = map(time2num, df_numErr['winEnd']) labs = df_numErr['labs'].unique() for lab in sorted(labs): df = df_numErr[df_numErr['labs'] == lab] df_plot = df.sort_values(['winStart_num', 'tool'], ascending=[True, False]) ax = fig.add_subplot(gs[indexPlot]) pointsAx = [] for tool in df_plot['tool'].unique(): pointsX = df_plot[df_plot['tool'] == tool]['winStart_num'] pointsY = df_plot[df_plot['tool'] == tool][y] pointsAx.append((pointsX, pointsY)) color = PH.get_palette([tool])[0] ax.plot(pointsX, pointsY, 'o-', markersize=2, color=color, linewidth=1, label=tool) #sns.factorplot(data=df_plot, ax=ax, kind="point", x="winStart_num", y=y, # hue="tool", palette=PH.get_palette(df_plot['tool'].unique())) set_xaxis(ax) if isPlotErrs: ax.set_title(str(errSet)) if y == 'numErrors_LOC-sumAll': # Cut short the (irrevant) highs for LOC div ax.set_ylim([ax.get_ylim()[0], ax.get_ylim()[1] / 2.0]) points.append(pointsAx) indexPlot += 1 PH.set_legend(fig, ncol=len(dict_dfNumErr['ALL']['tool'].unique())) PH.set_axLim(fig) adjust_limits(fig, params) fpath, fname = H.fetchExists_list(params, ['fpath', 'fname']) PH.savefig(fig, fpath, fname, '') return fig, gs, points
def adjust_limits(fig, params): minX, maxX = H.fetchExists_list(params, ['minX', 'maxX']) minY, maxY = H.fetchExists_list(params, ['minY', 'maxY']) PH.set_axLim(fig, minX=minX, maxX=maxX, minY=minY, maxY=maxY)