class Scg(object): def __init__(self, scg_id, scg_content, truth, request_at, response_at): self.id = scg_id self.content = scg_content self.response = truth self.request_at = request_at self.response_at = response_at self.truth_obj = Payload(truth) self.dummySample = Sample('data/inkml/65_alfonso.inkml') self.w_h_ratio = 1.0 # initialize here, updated in replace_traces() self.replace_traces() self.dummySample.re_calculate_IMG_MINMAX() def get_latex(self): return self.truth_obj.latex def replace_traces(self): ''' replace the traces in dummySample with the one generated from scg_content :return: ''' strokes = scginkparser.parse_scg_ink_file(self.content, self.id) #for st in strokes: #print st traces = {} trace_id_int = 0 for st in strokes: coords = np.zeros((2, len(st))) idx = 0 for x_y in st: coords[:, idx] = [float(x_y[0]), float(x_y[1])] idx += 1 traces[trace_id_int] = Stroke(trace_id_int, coords) trace_id_int += 1 # //Compute bounding box of the input expression x_min, y_min, x_max, y_max = get_bounding_box( traces) # bounding box for the whole math expression # Just in case there is only one point or a sequence of points perfectly aligned with the x or y axis if x_max == x_min: x_max = x_min + 1 if y_max == y_min: y_max = y_min + 1 self.w_h_ratio = float(x_max - x_min) / (y_max - y_min) # Renormalize to height [0,10000] keeping the aspect ratio H = 10000.0 W = H * (x_max - x_min) / (y_max - y_min) for trace_key, trace_v in traces.iteritems(): trace_v.calc_coords_h10000(H, W, x_min, y_min, x_max, y_max) self.dummySample.traces = traces def save_image(self, path): img, W, H = self.dummySample.render() print 'save image to: ', path misc.imsave(path, img)
def __init__(self, scg_id, scg_content, truth): self.id = scg_id self.content = scg_content self.truth_obj = Payload(truth) self.dummySample = Sample('data/inkml/65_alfonso.inkml') self.w_h_ratio = 1.0 # initialize here, updated in replace_traces() self.replace_traces() self.dummySample.re_calculate_IMG_MINMAX()
def process_list(self, pair_file_name, latex_out): ''' :param pair_file_name, set name: for example, 'train', one train.flist, the other train.idlist :param outfile: for example, data/batch/train.lst :param latex_out: 'data/batch/latex_list.txt' :return: ''' print "No dir specified, using default dir" base_dir = 'data/batch/' # self.list_out_dir flist_base_dir = self.list_out_dir prefix = 'im2latex_' infile = flist_base_dir + pair_file_name + '.flist' # data/batch/pickle/train.flist outfile = base_dir + prefix + pair_file_name + '.lst' # data/batch/im2latex_train.lst scg_id_list_file = flist_base_dir + pair_file_name + '.idlist' print 'input: ', infile print 'output: ', outfile with open(infile, 'rb') as fin: flist = pickle.load(fin) with open(scg_id_list_file, 'rb') as fin: idlist = pickle.load(fin) with codecs.open(outfile, 'w', 'utf-8') as f_out: for file_path in flist: print 'file: ', file_path # process one file sample = Sample(file_path) latex = sample.latex #output latex latex_out.write(latex[1:-1] + '\n') #get filename without extension png_name = self.get_filename_noext(file_path) line = str(self.latex_index) + ' ' + png_name + ' ' + 'basic\n' f_out.write(line) #output image img, W, H = sample.render() misc.imsave(png_dir + png_name + '.png', img) self.latex_index += 1 for id in idlist: print 'id: ', id logging.info('processing %d scg record', id) scg = self.scgs[id] latex = scg.get_latex() # output latex latex_out.write(latex + '\n') # get filename without extension png_name = str(id) line = str(self.latex_index) + ' ' + png_name + ' ' + 'basic\n' f_out.write(line) # output image (this task is done when dividing the list into 3 groups #scg.save_image(png_dir + png_name + '.png') self.latex_index += 1
def inkml2png(self): ''' for all inkml files in data/batch/inkml folder, generate its png image and save it to data/batch/formula_images/ folder It is used if different resolution images need to be generated for mhr performance evaluation. :return: ''' files, files_nolatex = self.all_inkml_files() for file_path in files: print 'file: ', file_path # process one file sample = Sample(file_path) # get filename without extension png_name = self.get_filename_noext(file_path) # output image img, W, H = sample.render() misc.imsave(png_dir + png_name + '.png', img)
def all_inkml_files(self): ''' :return: files, files_nolatex ''' files = [] files_nolatex = [] for root, dirnames, filenames in os.walk(self.inkml_file_path, followlinks=True): for filename in fnmatch.filter(filenames, '*.inkml'): tmp_path = os.path.join(root, filename) print tmp_path sample = Sample(tmp_path) if hasattr(sample, 'latex') and self.check_latex_length(sample): #latex has been stripped #print 'latex: ', tmp_path files.append(tmp_path) else: files_nolatex.append(tmp_path) return files, files_nolatex
coords[:, idx] = [float(x_y[0]), float(x_y[1])] idx += 1 traces[trace_id_int] = Stroke(trace_id_int, coords) trace_id_int += 1 # //Compute bounding box of the input expression x_min, y_min, x_max, y_max = get_bounding_box(traces) # bounding box for the whole math expression # Just in case there is only one point or a sequence of points perfectly aligned with the x or y axis if x_max == x_min: x_max = x_min + 1; if y_max == y_min: y_max = y_min + 1; # Renormalize to height [0,10000] keeping the aspect ratio H = 10000.0 W = H * (x_max - x_min) / (y_max - y_min) for trace_key, trace_v in traces.iteritems(): trace_v.calc_coords_h10000(H, W, x_min, y_min, x_max, y_max) for trace_key, trace_v in traces.iteritems(): print trace_key, trace_v rx, ry, rs, rt = trace_v.get_bounding_box_h10000() print rx, ry, rs, rt dummy_sample = Sample('data/inkml/65_alfonso.inkml') dummy_sample.traces = traces img, W, H = dummy_sample.render() print 'save image to temp/all.png: ' misc.imsave('temp/all.png', img)
def setUp(self): idd = Sample('data/inkml/65_alfonso.inkml') self.stroke0 = idd.traces[0] self.stroke1 = idd.traces[1] self.stroke2 = idd.traces[2] self.stroke9 = idd.traces[9]
def test_loading_inkml(self): iml = Sample('data/inkml/D_357_HMA095045.inkml') print 'latex: ', iml.latex
def setUp(self): self.idd = Sample('data/inkml/65_alfonso.inkml') self.stroke0 = self.idd.traces[0] self.stroke1 = self.idd.traces[1] self.stroke2 = self.idd.traces[2] prev = self.idd.traces[1]
class TestSample(TestCase): def setUp(self): self.idd = Sample('data/inkml/65_alfonso.inkml') self.stroke0 = self.idd.traces[0] self.stroke1 = self.idd.traces[1] self.stroke2 = self.idd.traces[2] prev = self.idd.traces[1] def test_inkml_loading(self): print 'loading inkml' print 'latex: ', self.idd.latex def test_loading_inkml(self): iml = Sample('data/inkml/D_357_HMA095045.inkml') print 'latex: ', iml.latex def test_get_bounding_box_h1000(self): self.assertEqual((0, 0, 69622, 10000), (self.idd.ox, self.idd.oy, self.idd.os, self.idd.ot)) def test_getAVGstroke_size(self): avgW, avgH = self.idd.getAVGstroke_size() targetW = 6075.5 targetH = 4320.8999 self.assertTrue(abs(avgW - targetW) < 1.0 / 2) self.assertTrue(abs(avgH - targetH) < 1.0 / 2) print avgW, avgH def test_nStrokes(self): ns = self.idd.nStrokes() self.assertEquals(10, ns) def test_detRefSymbol(self): ''' expect (5764, 5063) :return: ''' RX, RY = self.idd.detRefSymbol() print RX, RY self.assertEquals((5764, 5063), (RX, RY)) def test_stroke_aspect_area(self): # expect 9623, 6794, 1.41639686, 65378662 i = 0 ancho, alto, aspectratio, area = self.idd.stroke_aspect_area(i) print ancho, alto, aspectratio, area self.assertEquals(9623, ancho) self.assertEquals(6794, alto) self.assertEquals(65378662, area) self.assertAlmostEqual(1.41639686, aspectratio) def test_median_vmedx(self): vmedx = [9623, 6039, 7360, 7360, 6982, 4152, 3586, 3586, 6793, 5284] print vmedx vmedx.sort() print vmedx l = len(vmedx) print vmedx[l / 2] self.assertEquals(6793, vmedx[l / 2]) import numpy medx = numpy.median(vmedx) # average of 6039 and 6793 self.assertEquals(6416.0, medx) def test_stroke_distance1(self): self.idd.render() # distance between stroke 0 and stroke 2. the distance is before the normalization dmin = self.idd.stroke_distance(0, 2) self.assertEqual(7902.4310183639063, dmin) print dmin def test_stroke_distance2(self): img, W, H = self.idd.render() dmin = self.idd.stroke_distance(4, 7) print dmin def test_find_closest_pair(self): dmin, p1, p2 = self.idd.find_closest_pair(0, 2) print dmin self.assertAlmostEqual(7902.43115, dmin, 3) self.assertEqual((9433, 9056), p1) self.assertEqual((16037, 4716), p2) def test_render(self): ''' testing rendering image from inkml file save image at temp/all.png :return: ''' img, W, H = self.idd.render() print 'save image to temp/all.png: ' misc.imsave('temp/all.png', img) print img[15, 9] print W, H #self.assertEqual(255, img[15, 9]) #self.assertEqual((1792, 266), (W, H)) def test_linea(self): W, H = 1792, 266 img = np.ones((H, W), dtype=int) * 255 self.idd.pix_stk = np.ones((H, W), dtype=int) * (-1) pa = (19.4867649, 149.881516) pb = (5, 145.043594) self.idd.linea(img, pa, pb, 0) print np.where(img == 0) print img self.assertEqual(0, img[145, 5]) self.assertEqual(0, img[150, 20]) def test_compute_strokes_distances(self): RX = 5764 RY = 5063 img, W, H = self.idd.render() stk_dis = self.idd.compute_strokes_distances(RX, RY) print '(x, y): (1169, 119): ', img[119][1169], self.idd.pix_stk[119][ 1169] self.assertEqual(0, img[119][1169]) self.assertEqual(5, self.idd.pix_stk[119][1169]) print stk_dis def test_get_close_strokes(self): self.idd.detRefSymbol() self.idd.render() self.idd.compute_strokes_distances(self.idd.RX, self.idd.RY) L = [] self.idd.get_close_strokes(7, L, 0.69474973) self.assertEqual((6, 5), (L[0], L[1])) print L L = [] self.idd.get_close_strokes(6, L, 0.69474973) self.assertEqual(5, L[0]) L = [] self.idd.get_close_strokes(8, L, 0.69474973) self.assertEqual((7, 6), (L[0], L[1]))