def getSegmentationAccuracy(pathArray): dataX = [] dataY = [] j = 0 for path in pathArray: for f in os.listdir(path): j+=1 if f[-5:] != "inkml": continue # print f traceList, symbolsList = gettrace.parseINKMLFile(path + f) segmentIndices = segment.segmentSymbols(traceList) for label, elem in symbolsList: # if label in ['=', 'i', 'j','\\leq', '\\log', '\\sin', '\\cos', '\\lim', '\\geq', '\\righarrow', '\\div']: continue # Correctly classified if elem in segmentIndices: strokes = [traceList[i] for i in elem] pixels = itp.inkml_to_pixels(strokes) chain = list(itertools.chain(*pixels)) chain.append(len(strokes)) dataX.append(chain) dataY.append(label) # cr.display(pixels) print j f = file("segmented_data_18", "w") pickle.dump((dataX, dataY), f)
def recognize(): global svm print request.form['info'] strokes = json.loads(request.form['info']) symbolsIndices = segment.segmentSymbols(strokes) returnstr = "" for elem in symbolsIndices: s = [strokes[i] for i in elem] pixels = inkml_to_pixels.inkml_to_pixels(s) chain = list(itertools.chain(*pixels)) chain.append(len(s)) prediction = svm.predict(chain)[0] returnstr += prediction return returnstr
def getSegmentationAccuracy(pathArray): f = open('svm18px', "r") svm = pickle.load(f) j = 0 total = 0 total_correct = 0 off_by_one = 0 off_by_two = 0 for path in pathArray: for f in os.listdir(path): j+=1 if f[-5:] != "inkml": continue # print f traceList, symbolsList = gettrace.parseINKMLFile(path + f) segmentIndices = segment.segmentSymbols(traceList) correct = 0 for label, elem in symbolsList: # Correctly classified if elem in segmentIndices: strokes = [traceList[i] for i in elem] pixels = itp.inkml_to_pixels(strokes) chain = list(itertools.chain(*pixels)) chain.append(len(strokes)) if svm.predict(chain)[0] == label: correct +=1 if correct == len(symbolsList): total_correct +=1 if correct + 1 >= len(symbolsList): off_by_one +=1 if correct + 2 >= len(symbolsList): off_by_two +=1 # cr.display(pixels) print j print total_correct print off_by_one print off_by_two print float(total_correct)/836 print float(off_by_one)/836 print float(off_by_two)/836
def recognize_hmm(): global hmm_instance strokes = json.loads(request.form['info']) symbolsIndices = segment.segmentSymbols(strokes) returnstr = "" equation = [] for elem in symbolsIndices: s = [strokes[i] for i in elem] pixels = inkml_to_pixels.inkml_to_pixels(s) chain = list(itertools.chain(*pixels)) chain.append(len(s)) equation.append((chain,)) prediction = hmm_instance.compute_best_sequence(equation) for symbol in prediction: returnstr += symbol return returnstr
def getSegmentationAccuracy(pathArray): trainData = [] from time import time start = time() completely_correct = 0 total_files = 0 correct = 0 total = 0 mistakes = {} for path in pathArray: for file in os.listdir(path): if file[-5:] != "inkml": continue # print file traceList, symbolsList = gettrace.parseINKMLFile(path + file) segmentIndices = segment.segmentSymbols(traceList) correct_file = 0 total_file = 0 for label, elem in symbolsList: # if label in ['=', 'i', 'j','\\leq', '\\log', '\\sin', '\\cos', '\\lim', '\\geq', '\\righarrow', '\\div']: continue if elem in segmentIndices: correct_file = correct_file + 1 else: if label not in mistakes: mistakes[label] = 0 mistakes[label] +=1 total_file = total_file + 1 if correct_file == total_file: completely_correct +=1 correct += correct_file total += total_file total_files +=1 # cr.display(pixels) print time()-start print float(correct)/total print float(completely_correct)/total_files print mistakes c = 0 for elem in mistakes: c = c + mistakes[elem] print c
# each element in the list corresponds to a trace. # Each trace is represented as a list of (x,y) tuples. def parseINKMLFile(filename): with file(filename) as f: s = f.read() parser = MyINKMLParser() parser.init() parser.feed(s) return parser.traceList, parser.symbolsList # Given the path to an INKML file, def parseSymbolOrder(filename): orderedSymbolList = [] with file(filename) as f: s = f.read() parser = MyINKMLParser() parser.init() parser.feed(s) for key in sorted(parser.symbolOrder): orderedSymbolList.append(parser.symbolOrder[key]) return orderedSymbolList if __name__ == "__main__": # n = 'samples/test_sample_recognized.inkml' # n = 'ICFHR_package/CROHME2012_data/testDataGT/001-equation001.inkml' n = 'ICFHR_package/CROHME2012_data/trainData/trainData/algb09.inkml' # n = 'samples/train_sample_with_GT.inkml' traceList, symbolsList = parseINKMLFile(n) segmentIndices = segment.segmentSymbols(traceList)