def getSegmentationAccuracy(pathArray): dataX = [] dataY = [] j = 0 for path in pathArray: for f in os.listdir(path): j+=1 if f[-5:] != "inkml": continue # print f traceList, symbolsList = gettrace.parseINKMLFile(path + f) segmentIndices = segment.segmentSymbols(traceList) for label, elem in symbolsList: # if label in ['=', 'i', 'j','\\leq', '\\log', '\\sin', '\\cos', '\\lim', '\\geq', '\\righarrow', '\\div']: continue # Correctly classified if elem in segmentIndices: strokes = [traceList[i] for i in elem] pixels = itp.inkml_to_pixels(strokes) chain = list(itertools.chain(*pixels)) chain.append(len(strokes)) dataX.append(chain) dataY.append(label) # cr.display(pixels) print j f = file("segmented_data_18", "w") pickle.dump((dataX, dataY), f)
def getSegmentationAccuracy(pathArray): f = open('svm18px', "r") svm = pickle.load(f) j = 0 total = 0 total_correct = 0 off_by_one = 0 off_by_two = 0 for path in pathArray: for f in os.listdir(path): j+=1 if f[-5:] != "inkml": continue # print f traceList, symbolsList = gettrace.parseINKMLFile(path + f) segmentIndices = segment.segmentSymbols(traceList) correct = 0 for label, elem in symbolsList: # Correctly classified if elem in segmentIndices: strokes = [traceList[i] for i in elem] pixels = itp.inkml_to_pixels(strokes) chain = list(itertools.chain(*pixels)) chain.append(len(strokes)) if svm.predict(chain)[0] == label: correct +=1 if correct == len(symbolsList): total_correct +=1 if correct + 1 >= len(symbolsList): off_by_one +=1 if correct + 2 >= len(symbolsList): off_by_two +=1 # cr.display(pixels) print j print total_correct print off_by_one print off_by_two print float(total_correct)/836 print float(off_by_one)/836 print float(off_by_two)/836
def getData(pathArray): dataX = [] dataY = [] from time import time start = time() for path in pathArray: for file in os.listdir(path): if file[-5:] != "inkml": continue traceList, symbolsList = gettrace.parseINKMLFile(path + file) for label, indices in symbolsList: strokes = [traceList[elem] for elem in indices] pixels = itp.inkml_to_pixels(strokes) chain = list(itertools.chain(*pixels)) chain.append(len(strokes)) dataX.append(chain) dataY.append(label) # itp.display(pixels) print time()-start return dataX, dataY
def getSegmentationAccuracy(pathArray): trainData = [] from time import time start = time() completely_correct = 0 total_files = 0 correct = 0 total = 0 mistakes = {} for path in pathArray: for file in os.listdir(path): if file[-5:] != "inkml": continue # print file traceList, symbolsList = gettrace.parseINKMLFile(path + file) segmentIndices = segment.segmentSymbols(traceList) correct_file = 0 total_file = 0 for label, elem in symbolsList: # if label in ['=', 'i', 'j','\\leq', '\\log', '\\sin', '\\cos', '\\lim', '\\geq', '\\righarrow', '\\div']: continue if elem in segmentIndices: correct_file = correct_file + 1 else: if label not in mistakes: mistakes[label] = 0 mistakes[label] +=1 total_file = total_file + 1 if correct_file == total_file: completely_correct +=1 correct += correct_file total += total_file total_files +=1 # cr.display(pixels) print time()-start print float(correct)/total print float(completely_correct)/total_files print mistakes c = 0 for elem in mistakes: c = c + mistakes[elem] print c
def getData(pathArray): data = [] from time import time start = time() for path in pathArray: for file in os.listdir(path): if file[-5:] != "inkml": continue traceList, symbolsList = gettrace.parseINKMLFile(path + file) equationDict = {} for label, indices in symbolsList: strokes = [traceList[elem] for elem in indices] pixels = itp.inkml_to_pixels(strokes) chain = list(itertools.chain(*pixels)) chain.append(len(strokes)) equationDict[indices[0]] = (chain, label) equationList = [] for key in sorted(equationDict): equationList.append(equationDict[key]) data.append(equationList) # itp.display(pixels) print time()-start return data