Ejemplo n.º 1
0
def writeErrSetFile(name, fname):
    headers, lines = H.readCSV(fname)
    headers.append("ErrSet")
    dictErrDiff = {} # {CompErr1:ErrSet1, ...}
    allErrs = readPrev_AllErrors()
    count = 0
    print('Total #src-target pairs=',len(lines))

    indexErrPrutor = headers.index("sourceErrorPrutor")
    indexErrClang = headers.index("sourceErrorClangParse")
    indexLineNums = headers.index("lineNums_Abs")
    indexDi, indexDd = headers.index("diffAbs_ins"), headers.index("diffAbs_del")

    for line in lines:        
        count += 1
        if count%1000==0:
            print(count,'/',len(lines),'done ...')

        diffsI, diffsD = line[indexDi].splitlines(), line[indexDd].splitlines()
        errPrutor, errClang, diffLineNums = line[indexErrPrutor], line[indexErrClang], set(line[indexLineNums].splitlines())
        errPrutor, errClang = errPrutor.replace('\r', '\n'), errClang.replace('\r', '\n')

        errSet, errExpList, compLineNums = getErrSet(allErrs, dictErrDiff, errPrutor) # Get the err-set (unique rep for set of errors)
        clusterErr(errSet, diffsI, diffsD) # Cluster the diffs (add the diff to dictErrDiff)
        errSet.calcIntersection(compLineNums, diffLineNums) # Update counts to calc precision-recall of compiler lineNums

        line.append(errSet.key)
        
    H.writeCSV(fname, headers, lines)
    writeAllErrs(allErrs)
    writeClusterErr(name, dictErrDiff)
Ejemplo n.º 2
0
def readPrev_AllErrors():
    '''Check if indexing of errors (sorted based on count) is already present in the path.
    Based on some previous run (or semester). If so, use that indexing (most freq comp error gets index-1)'''
    allErrs = {}
    try:
        headers, lines = H.readCSV(CF.fname_errorIDs)
        indexIndex, indexErrExp = headers.index('index'), headers.index('error')

        for line in lines:
            index, errExp = line[indexIndex], line[indexErrExp]
            allErrs[errExp] = Error(errExp, index=index)

    except IOError:
        pass

    return allErrs