def getSegmentationAccuracy(pathArray):
  dataX = []
  dataY = []
  j = 0
  for path in pathArray:
    for f in os.listdir(path):
      j+=1
      if f[-5:] != "inkml": continue
      # print f
      traceList, symbolsList = gettrace.parseINKMLFile(path + f)
      segmentIndices = segment.segmentSymbols(traceList)
      for label, elem in symbolsList:
        # if label in ['=', 'i', 'j','\\leq', '\\log', '\\sin', '\\cos', '\\lim', '\\geq', '\\righarrow', '\\div']: continue
        
        # Correctly classified
        if elem in segmentIndices:
          strokes = [traceList[i] for i in elem]
          pixels = itp.inkml_to_pixels(strokes)
          chain = list(itertools.chain(*pixels))
          chain.append(len(strokes))
          dataX.append(chain)
          dataY.append(label)
          # cr.display(pixels)
      print j
  f = file("segmented_data_18", "w")
  pickle.dump((dataX, dataY), f)
def getSegmentationAccuracy(pathArray):
  f = open('svm18px', "r")
  svm = pickle.load(f)
  
  j = 0

  total = 0
  total_correct = 0
  off_by_one = 0
  off_by_two = 0
  
  for path in pathArray:
    for f in os.listdir(path):
      j+=1
      if f[-5:] != "inkml": continue
      # print f
      traceList, symbolsList = gettrace.parseINKMLFile(path + f)
      segmentIndices = segment.segmentSymbols(traceList)

      correct = 0

      for label, elem in symbolsList:
        
        # Correctly classified
        if elem in segmentIndices:
          strokes = [traceList[i] for i in elem]
          pixels = itp.inkml_to_pixels(strokes)
          chain = list(itertools.chain(*pixels))
          chain.append(len(strokes))
          if svm.predict(chain)[0] == label:
            correct +=1
      if correct == len(symbolsList):
        total_correct +=1
      if correct + 1 >= len(symbolsList):
        off_by_one +=1
      if correct + 2 >= len(symbolsList):
        off_by_two +=1

          # cr.display(pixels)
      print j
  print total_correct
  print off_by_one
  print off_by_two

  print float(total_correct)/836
  print float(off_by_one)/836
  print float(off_by_two)/836
Example #3
0
def getData(pathArray):
  dataX = []
  dataY = []
  from time import time
  start = time()
  for path in pathArray:
    for file in os.listdir(path):
      if file[-5:] != "inkml": continue
      traceList, symbolsList = gettrace.parseINKMLFile(path + file)
      for label, indices in symbolsList:
          strokes = [traceList[elem] for elem in indices]
          pixels = itp.inkml_to_pixels(strokes)
          chain = list(itertools.chain(*pixels))
          chain.append(len(strokes))
          dataX.append(chain)
          dataY.append(label)
          # itp.display(pixels)
  print time()-start
  return dataX, dataY
def getSegmentationAccuracy(pathArray):
  trainData = []
  from time import time
  start = time()
  completely_correct = 0
  total_files = 0
  correct = 0
  total = 0
  mistakes = {}
  for path in pathArray:
    for file in os.listdir(path):
      if file[-5:] != "inkml": continue
      # print file
      traceList, symbolsList = gettrace.parseINKMLFile(path + file)
      segmentIndices = segment.segmentSymbols(traceList)
      correct_file = 0
      total_file = 0
      for label, elem in symbolsList:
        # if label in ['=', 'i', 'j','\\leq', '\\log', '\\sin', '\\cos', '\\lim', '\\geq', '\\righarrow', '\\div']: continue
        if elem in segmentIndices:
          correct_file = correct_file + 1
        else:
          if label not in mistakes:
            mistakes[label] = 0
          mistakes[label] +=1
        total_file = total_file + 1

      if correct_file == total_file:
        completely_correct +=1

      correct += correct_file
      total += total_file

      total_files +=1
          # cr.display(pixels)
  print time()-start
  print float(correct)/total
  print float(completely_correct)/total_files
  print mistakes
  c = 0
  for elem in mistakes:
    c = c + mistakes[elem]
  print c
def getData(pathArray):
  data = []
  from time import time
  start = time()
  for path in pathArray:
    for file in os.listdir(path):
      if file[-5:] != "inkml": continue
      traceList, symbolsList = gettrace.parseINKMLFile(path + file)
      equationDict = {}
      for label, indices in symbolsList:
          strokes = [traceList[elem] for elem in indices]
          pixels = itp.inkml_to_pixels(strokes)
          chain = list(itertools.chain(*pixels))
          chain.append(len(strokes))
          equationDict[indices[0]] = (chain, label)
      equationList = []
      for key in sorted(equationDict):
        equationList.append(equationDict[key])
      data.append(equationList)
          # itp.display(pixels)
  print time()-start
  return data