def create_svt_subset(output_path): writer = tf.python_io.TFRecordWriter(output_path) test_xml_path = os.path.join(FLAGS.data_dir, 'test.xml') count = 0 xml_root = ET.parse(test_xml_path).getroot() for image_node in xml_root.findall('image'): image_rel_path = image_node.find('imageName').text lexicon = image_node.find('lex').text.lower() lexicon = lexicon.split(',') image_path = os.path.join(FLAGS.data_dir, image_rel_path) image = Image.open(image_path) image_w, image_h = image.size for i, rect in enumerate(image_node.find('taggedRectangles')): bbox_x = float(rect.get('x')) bbox_y = float(rect.get('y')) bbox_w = float(rect.get('width')) bbox_h = float(rect.get('height')) if FLAGS.crop_margin > 0: margin = bbox_h * FLAGS.crop_margin bbox_x = bbox_x - margin bbox_y = bbox_y - margin bbox_w = bbox_w + 2 * margin bbox_h = bbox_h + 2 * margin bbox_xmin = int(round(max(0, bbox_x))) bbox_ymin = int(round(max(0, bbox_y))) bbox_xmax = int(round(min(image_w - 1, bbox_x + bbox_w))) bbox_ymax = int(round(min(image_h - 1, bbox_y + bbox_h))) word_crop_im = image.crop( (bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax)) im_buff = io.BytesIO() word_crop_im.save(im_buff, format='jpeg') word_crop_jpeg = im_buff.getvalue() crop_name = '{}:{}'.format(image_rel_path, i) groundtruth_text = rect.find('tag').text.lower() example = tf.train.Example(features=tf.train.Features(feature={ fields.TfExampleFields.image_encoded: \ dataset_util.bytes_feature(word_crop_jpeg), fields.TfExampleFields.image_format: \ dataset_util.bytes_feature('jpeg'.encode('utf-8')), fields.TfExampleFields.filename: \ dataset_util.bytes_feature(crop_name.encode('utf-8')), fields.TfExampleFields.channels: \ dataset_util.int64_feature(3), fields.TfExampleFields.colorspace: \ dataset_util.bytes_feature('rgb'.encode('utf-8')), fields.TfExampleFields.transcript: \ dataset_util.bytes_feature(groundtruth_text.encode('utf-8')), fields.TfExampleFields.lexicon: \ dataset_util.bytes_feature(('\t'.join(lexicon)).encode('utf-8')), })) writer.write(example.SerializeToString()) count += 1 writer.close() print('{} examples created'.format(count))
def create_ic15(output_path): writer = tf.python_io.TFRecordWriter(output_path) data_dirs = '/data/wangyz/Projects/Dataset/SynthText_Add/' groundtruth_file_pathes = '/data/wangyz/Projects/Dataset/SynthText_Add/annotationlist/' dataset_names = ['IC13' ,'IC15' , 'COCO-train','COCO-val'] count = 0 for datasetid in range(1,21): groundtruth_file_path = groundtruth_file_pathes + 'gt_'+str(datasetid)+'.txt' with open(groundtruth_file_path, 'r') as f: lines = f.readlines() img_gts = [line.strip() for line in lines] for img_gt in img_gts: content=img_gt.split(',') img_path=data_dirs + 'crop_img_' + str(datasetid) + '/' + content[0] gt = content[1][1:-1] #print(img_path) #print(gt) #input() if FLAGS.exclude_difficult and not char_check(gt): continue #img_path = os.path.join(FLAGS.data_dir, img_rel_path) img = Image.open(img_path) img=img.convert('RGB') img_buff = io.BytesIO() img.save(img_buff, format='jpeg') word_crop_jpeg = img_buff.getvalue() crop_name = 'SynAdd_' + os.path.basename(img_path) example = tf.train.Example(features=tf.train.Features(feature={ fields.TfExampleFields.image_encoded: \ dataset_util.bytes_feature(word_crop_jpeg), fields.TfExampleFields.image_format: \ dataset_util.bytes_feature('jpeg'.encode('utf-8')), fields.TfExampleFields.filename: \ dataset_util.bytes_feature(crop_name.encode('utf-8')), fields.TfExampleFields.channels: \ dataset_util.int64_feature(3), fields.TfExampleFields.colorspace: \ dataset_util.bytes_feature('rgb'.encode('utf-8')), fields.TfExampleFields.transcript: \ dataset_util.bytes_feature(gt.encode('utf-8')), })) writer.write(example.SerializeToString()) count += 1 if count % 2000 == 0: print(count) writer.close() print('{} examples created'.format(count))
def create_ic15(output_path): writer = tf.python_io.TFRecordWriter(output_path) groundtruth_file_path = os.path.join(FLAGS.data_dir, 'test_groundtruth_all.txt') count = 0 with open(groundtruth_file_path, 'r') as f: lines = f.readlines() img_gts = [line.strip() for line in lines] for img_gt in img_gts: img_rel_path, gt = img_gt.split(' ', 1) if FLAGS.exclude_difficult and not char_check(gt): continue img_path = os.path.join(FLAGS.data_dir, img_rel_path) img = Image.open(img_path) img_buff = io.BytesIO() img.save(img_buff, format='jpeg') word_crop_jpeg = img_buff.getvalue() crop_name = os.path.basename(img_path) example = tf.train.Example(features=tf.train.Features(feature={ fields.TfExampleFields.image_encoded: \ dataset_util.bytes_feature(word_crop_jpeg), fields.TfExampleFields.image_format: \ dataset_util.bytes_feature('jpeg'.encode('utf-8')), fields.TfExampleFields.filename: \ dataset_util.bytes_feature(crop_name.encode('utf-8')), fields.TfExampleFields.channels: \ dataset_util.int64_feature(3), fields.TfExampleFields.colorspace: \ dataset_util.bytes_feature('rgb'.encode('utf-8')), fields.TfExampleFields.transcript: \ dataset_util.bytes_feature(gt.encode('utf-8')), })) writer.write(example.SerializeToString()) count += 1 writer.close() print('{} examples created'.format(count))
def create_svt_perspective(output_path): writer = tf.python_io.TFRecordWriter(output_path) image_list_file = os.path.join(FLAGS.data_dir, 'imagelist.txt') with open(image_list_file, 'r') as f: tlines = [tline.rstrip('\n') for tline in f.readlines()] count = 0 for tline in tlines: image_rel_path, groundtruth_text, lexicon_length, lexicon = \ tline.split(' ') groundtruth_text = groundtruth_text.lower() lexicon_length = int(lexicon_length) lexicon_list = [w.lower() for w in lexicon.split(',')] image_path = os.path.join(FLAGS.data_dir, image_rel_path) with open(image_path, 'rb') as f: image_jpeg = f.read() example = tf.train.Example(features=tf.train.Features(feature={ fields.TfExampleFields.image_encoded: \ dataset_util.bytes_feature(image_jpeg), fields.TfExampleFields.image_format: \ dataset_util.bytes_feature('jpeg'.encode('utf-8')), fields.TfExampleFields.filename: \ dataset_util.bytes_feature(image_rel_path.encode('utf-8')), fields.TfExampleFields.channels: \ dataset_util.int64_feature(3), fields.TfExampleFields.colorspace: \ dataset_util.bytes_feature('rgb'.encode('utf-8')), fields.TfExampleFields.transcript: \ dataset_util.bytes_feature(groundtruth_text.encode('utf-8')), fields.TfExampleFields.lexicon: \ dataset_util.bytes_feature(('\t'.join(lexicon_list)).encode('utf-8')), })) writer.write(example.SerializeToString()) count += 1 writer.close() print('{} examples created'.format(count))
def create_iiit5k_subset(output_path, train_subset=True, lexicon_index=None): writer = tf.python_io.TFRecordWriter(output_path) mat_file_name = 'traindata.mat' if train_subset else 'testdata.mat' data_key = 'traindata' if train_subset else 'testdata' groundtruth_mat_path = os.path.join(FLAGS.data_dir, mat_file_name) mat_dict = sio.loadmat(groundtruth_mat_path) entries = mat_dict[data_key].flatten() for entry in tqdm(entries): image_rel_path = str(entry[0][0]) groundtruth_text = str(entry[1][0]) if not train_subset: lexicon = [str(t[0]) for t in entry[lexicon_index].flatten()] image_path = os.path.join(FLAGS.data_dir, image_rel_path) with open(image_path, 'rb') as f: image_jpeg = f.read() example = tf.train.Example(features=tf.train.Features(feature={ fields.TfExampleFields.image_encoded: \ dataset_util.bytes_feature(image_jpeg), fields.TfExampleFields.image_format: \ dataset_util.bytes_feature('jpeg'.encode('utf-8')), fields.TfExampleFields.filename: \ dataset_util.bytes_feature(image_rel_path.encode('utf-8')), fields.TfExampleFields.channels: \ dataset_util.int64_feature(3), fields.TfExampleFields.colorspace: \ dataset_util.bytes_feature('rgb'.encode('utf-8')), fields.TfExampleFields.transcript: \ dataset_util.bytes_feature(groundtruth_text.encode('utf-8')), fields.TfExampleFields.lexicon: \ dataset_util.bytes_feature(('\t'.join(lexicon)).encode('utf-8')) })) writer.write(example.SerializeToString()) writer.close()
def create_ic15(output_path): writer = tf.python_io.TFRecordWriter(output_path) data_dirs = ['/data/wangyz/Projects/Dataset/IC13_WordRecognition/Challenge2_Training_Task3_Images_GT/', '/data/wangyz/Projects/Dataset/IC15Inc_WordRecognition/ch4_training_word_images_gt/', '/data/wangyz/Projects/Dataset/COCO_WordRecognition/train_words/', '/data/wangyz/Projects/Dataset/COCO_WordRecognition/val_words/'] groundtruth_file_pathes = ['/data/wangyz/Projects/Dataset/IC13_WordRecognition/Challenge2_Training_Task3_Images_GT/gt.txt', '/data/wangyz/Projects/Dataset/IC15Inc_WordRecognition/ch4_training_word_images_gt/gt.txt', '/data/wangyz/Projects/Dataset/COCO_WordRecognition/train_words_gt.txt', '/data/wangyz/Projects/Dataset/COCO_WordRecognition/val_words_gt.txt', ] dataset_names = ['IC13' ,'IC15' , 'COCO-train','COCO-val'] count = 0 for datasetid in range(4): #datasetid = 0 groundtruth_file_path = groundtruth_file_pathes[datasetid] with open(groundtruth_file_path, 'r') as f: lines = f.readlines() img_gts = [line.strip() for line in lines] for img_gt in img_gts: if datasetid == 0: content=img_gt.split(',') img_path=data_dirs[datasetid] + content[0] gt = content[1][2:-1] if datasetid == 1: content=img_gt.split(',') img_path=data_dirs[datasetid] + content[0] gt = content[1][2:-1] if datasetid == 2: content=img_gt.split(',') img_path=data_dirs[datasetid]+content[0]+'.jpg' ll=len(content[0]) gt = img_gt[ll+1:] if datasetid == 3: content=img_gt.split(',') img_path=data_dirs[datasetid]+content[0]+'.jpg' ll=len(content[0]) gt = img_gt[ll+1:] if FLAGS.exclude_difficult and not char_check(gt): continue #img_path = os.path.join(FLAGS.data_dir, img_rel_path) img = Image.open(img_path) img=img.convert('RGB') img_buff = io.BytesIO() img.save(img_buff, format='jpeg') word_crop_jpeg = img_buff.getvalue() crop_name = dataset_names[datasetid] + '_' + os.path.basename(img_path) gt = gt.replace(' ', '') #print(img_gt) #print(crop_name) #print(gt) #input() example = tf.train.Example(features=tf.train.Features(feature={ fields.TfExampleFields.image_encoded: \ dataset_util.bytes_feature(word_crop_jpeg), fields.TfExampleFields.image_format: \ dataset_util.bytes_feature('jpeg'.encode('utf-8')), fields.TfExampleFields.filename: \ dataset_util.bytes_feature(crop_name.encode('utf-8')), fields.TfExampleFields.channels: \ dataset_util.int64_feature(3), fields.TfExampleFields.colorspace: \ dataset_util.bytes_feature('rgb'.encode('utf-8')), fields.TfExampleFields.transcript: \ dataset_util.bytes_feature(gt.encode('utf-8')), })) writer.write(example.SerializeToString()) count += 1 if count % 1000 == 0: print(count) writer.close() print('{} examples created'.format(count))
def create_ic13(output_path): writer = tf.python_io.TFRecordWriter(output_path) groundtruth_dir = os.path.join(FLAGS.data_dir, 'Challenge2_Test_Task1_GT') groundtruth_files = glob.glob(os.path.join(groundtruth_dir, '*.txt')) count = 0 for groundtruth_file in groundtruth_files: image_id = re.match(r'.*gt_img_(\d+).txt$', groundtruth_file).group(1) image_rel_path = 'img_{}.jpg'.format(image_id) image_path = os.path.join(FLAGS.data_dir, 'Challenge2_Test_Task12_Images', image_rel_path) image = Image.open(image_path) image_w, image_h = image.size with open(groundtruth_file, 'r') as f: groundtruth = f.read() matches = re.finditer(r'^(\d+),\s(\d+),\s(\d+),\s(\d+),\s\"(.+)\"$', groundtruth, re.MULTILINE) for i, match in enumerate(matches): bbox_xmin = float(match.group(1)) bbox_ymin = float(match.group(2)) bbox_xmax = float(match.group(3)) bbox_ymax = float(match.group(4)) groundtruth_text = match.group(5) if _is_difficult(groundtruth_text): continue if FLAGS.crop_margin > 0: bbox_h = bbox_ymax - bbox_ymin margin = bbox_h * FLAGS.crop_margin bbox_xmin = bbox_xmin - margin bbox_ymin = bbox_ymin - margin bbox_xmax = bbox_xmax + margin bbox_ymax = bbox_ymax + margin bbox_xmin = int(round(max(0, bbox_xmin))) bbox_ymin = int(round(max(0, bbox_ymin))) bbox_xmax = int(round(min(image_w - 1, bbox_xmax))) bbox_ymax = int(round(min(image_h - 1, bbox_ymax))) word_crop_im = image.crop( (bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax)) im_buff = io.BytesIO() word_crop_im.save(im_buff, format='jpeg') word_crop_jpeg = im_buff.getvalue() crop_name = '{}:{}'.format(image_rel_path, i) example = tf.train.Example(features=tf.train.Features(feature={ fields.TfExampleFields.image_encoded: \ dataset_util.bytes_feature(word_crop_jpeg), fields.TfExampleFields.image_format: \ dataset_util.bytes_feature('jpeg'.encode('utf-8')), fields.TfExampleFields.filename: \ dataset_util.bytes_feature(crop_name.encode('utf-8')), fields.TfExampleFields.channels: \ dataset_util.int64_feature(3), fields.TfExampleFields.colorspace: \ dataset_util.bytes_feature('rgb'.encode('utf-8')), fields.TfExampleFields.transcript: \ dataset_util.bytes_feature(groundtruth_text.encode('utf-8')), })) writer.write(example.SerializeToString()) count += 1 writer.close() print('{} examples created'.format(count))
def create_ic03(output_path): writer = tf.python_io.TFRecordWriter(output_path) lexicon_file = os.path.join(FLAGS.data_dir, 'lexicon_full') with open(lexicon_file, 'r') as f: lexicon_list = [tline.rstrip('\n').lower() for tline in f.readlines()] xml_path = os.path.join(FLAGS.data_dir, 'words.xml') xml_root = ET.parse(xml_path).getroot() count = 0 for image_node in xml_root.findall('image'): image_rel_path = image_node.find('imageName').text image_path = os.path.join(FLAGS.data_dir, image_rel_path) image = Image.open(image_path) image_w, image_h = image.size for i, rect in enumerate(image_node.find('taggedRectangles')): groundtruth_text = rect.find('tag').text.lower() if FLAGS.ignore_difficult and _is_difficult(groundtruth_text): # print('Ignoring {}'.format(groundtruth_text)) continue bbox_x = float(rect.get('x')) bbox_y = float(rect.get('y')) bbox_w = float(rect.get('width')) bbox_h = float(rect.get('height')) if FLAGS.crop_margin > 0: margin = bbox_h * FLAGS.crop_margin bbox_x = bbox_x - margin bbox_y = bbox_y - margin bbox_w = bbox_w + 2 * margin bbox_h = bbox_h + 2 * margin bbox_xmin = int(round(max(0, bbox_x))) bbox_ymin = int(round(max(0, bbox_y))) bbox_xmax = int(round(min(image_w - 1, bbox_x + bbox_w))) bbox_ymax = int(round(min(image_h - 1, bbox_y + bbox_h))) word_crop_im = image.crop( (bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax)) im_buff = io.BytesIO() word_crop_im.save(im_buff, format='jpeg') word_crop_jpeg = im_buff.getvalue() crop_name = '{}:{}'.format(image_rel_path, i) lexicon = _random_lexicon(lexicon_list, groundtruth_text, lexicon_size) example = tf.train.Example(features=tf.train.Features(feature={ fields.TfExampleFields.image_encoded: \ dataset_util.bytes_feature(word_crop_jpeg), fields.TfExampleFields.image_format: \ dataset_util.bytes_feature('jpeg'.encode('utf-8')), fields.TfExampleFields.filename: \ dataset_util.bytes_feature(crop_name.encode('utf-8')), fields.TfExampleFields.channels: \ dataset_util.int64_feature(3), fields.TfExampleFields.colorspace: \ dataset_util.bytes_feature('rgb'.encode('utf-8')), fields.TfExampleFields.transcript: \ dataset_util.bytes_feature(groundtruth_text.encode('utf-8')), fields.TfExampleFields.lexicon: \ dataset_util.bytes_feature(('\t'.join(lexicon)).encode('utf-8')), })) writer.write(example.SerializeToString()) count += 1 writer.close() print('{} examples created'.format(count))
def main(_): writer = tf.python_io.TFRecordWriter(FLAGS.output_path) # load groundtruth file groundtruth_file = os.path.join(FLAGS.data_dir, 'annotation.txt') with open(groundtruth_file, 'r') as f: groundtruth_lines = f.readlines() num_images = len(groundtruth_lines) - FLAGS.start_index if FLAGS.num_images > 0: num_images = min(num_images, FLAGS.num_images) indices = list(range(FLAGS.start_index, FLAGS.start_index + num_images)) if FLAGS.shuffle: random.shuffle(indices) # a test decode pipeline for validating image image_jpeg_input = tf.placeholder(dtype=tf.string, shape=[]) image = tf.image.decode_jpeg(image_jpeg_input, channels=3, try_recover_truncated=False, acceptable_fraction=1) with tf.Session() as sess: for index in tqdm(indices): image_rel_path = groundtruth_lines[index].split(' ')[0] image_path = os.path.join(FLAGS.data_dir, image_rel_path) # validate image valid = True image_jpeg = None try: with open(image_path, 'rb') as f: image_jpeg = f.read() image_output = sess.run( image, feed_dict={image_jpeg_input: image_jpeg}) if (image_output.ndim != 3 or image_output.shape[0] == 0 or image_output.shape[1] == 0 or image_output.shape[2] != 3): valid = False except: valid = False if not valid: logging.warn('Skip invalid image {}'.format(image_rel_path)) continue # extract groundtruth groundtruth_text = image_rel_path.split('_')[1] # write example example = tf.train.Example(features=tf.train.Features(feature={ fields.TfExampleFields.image_encoded: \ dataset_util.bytes_feature(image_jpeg), fields.TfExampleFields.image_format: \ dataset_util.bytes_feature('jpeg'.encode('utf-8')), fields.TfExampleFields.filename: \ dataset_util.bytes_feature(image_rel_path.encode('utf-8')), fields.TfExampleFields.channels: \ dataset_util.int64_feature(3), fields.TfExampleFields.colorspace: \ dataset_util.bytes_feature('rgb'.encode('utf-8')), fields.TfExampleFields.transcript: \ dataset_util.bytes_feature(groundtruth_text.encode('utf-8')) })) writer.write(example.SerializeToString()) writer.close()
def main(_): writer = tf.python_io.TFRecordWriter(FLAGS.output_path) # load groundtruth file groundtruth_path = os.path.join(FLAGS.data_dir, 'gt.mat') if not os.path.exists(groundtruth_path): raise ValueError( 'Could not find groundtruth file: {}'.format(groundtruth_path)) print('Loading groundtruth...') groundtruth = sio.loadmat(groundtruth_path) num_images = groundtruth['wordBB'].shape[1] - FLAGS.start_index if FLAGS.num_images > 0: num_images = min(num_images, FLAGS.num_images) indices = list(range(FLAGS.start_index, FLAGS.start_index + num_images)) if FLAGS.shuffle: random.shuffle(indices) count = 0 skipped = 0 dump_images_count = 0 for index in tqdm(indices): image_rel_path = str(groundtruth['imnames'][0, index][0]) text = groundtruth['txt'][0, index] word_polygons = groundtruth['wordBB'][0, index] char_polygons = np.transpose(groundtruth['charBB'][0, index], axes=[2, 1, 0]) _create_samples_of_an_image(writer, image_rel_path, text, word_polygons, char_polygons) for index in tqdm(indices): # try: image_rel_path = str(groundtruth['imnames'][0, index][0]) image_path = os.path.join(FLAGS.data_dir, image_rel_path) # load image jpeg data im = Image.open(image_path) im_width, im_height = im.size # word polygons word_polygons = groundtruth['wordBB'][0, index] if word_polygons.ndim == 2: word_polygons = np.expand_dims(word_polygons, axis=2) word_polygons = np.transpose(word_polygons, axes=[2, 1, 0]) bbox_xymin = np.min(word_polygons, axis=1) bbox_xymax = np.max(word_polygons, axis=1) bbox_wh = bbox_xymax - bbox_xymin bbox_margin = np.expand_dims(margin_ratio * np.sqrt(bbox_wh[:, 0] * bbox_wh[:, 1]), axis=1) enlarged_bbox_xymin = np.maximum( bbox_xymin - bbox_margin, np.asarray([[0, 0]], dtype=np.float32)) enlarged_bbox_xymax = np.minimum( bbox_xymax + bbox_margin, np.asarray([[im_width - 1, im_height - 1]], dtype=np.float32)) bbox_array = np.concatenate([enlarged_bbox_xymin, enlarged_bbox_xymax], axis=1) bbox_array = np.round(bbox_array) num_bboxes = bbox_array.shape[0] # words text = groundtruth['txt'][0, index] words = [] for text_line in text: text_line = str(text_line) line_words = ('\n'.join(text_line.split())).split('\n') words.extend(line_words) if len(words) != num_bboxes: raise ValueError( 'Number of words and bboxes mismtach: {} vs {}'.format( len(words), num_bboxes)) # char polygons for every word all_char_polygons = np.transpose(groundtruth['charBB'][0, index], axes=[2, 1, 0]) char_polygons_list = [] offset = 0 for word in words: word_len = len(word) char_polygons_list.append(all_char_polygons[offset:offset + word_len]) offset += word_len if offset != all_char_polygons.shape[0]: raise ValueError('Offset mismtach: {} vs {}'.format( offset, all_char_polygons.shape[0])) def _fit_and_divide(points): # points: [num_points, 2] degree = 2 if points.shape[0] > 2 else 1 coeffs = np.polyfit(points[:, 0], points[:, 1], degree) poly_fn = np.poly1d(coeffs) xmin, xmax = np.min(points[:, 0]), np.max(points[:, 0]) xs = np.linspace(xmin, xmax, num=(num_keypoints // 2)) ys = poly_fn(xs) return np.stack([xs, ys], axis=1) image_w, image_h = im.size for i, bbox in enumerate(bbox_array): try: # crop image and encode to jpeg crop_coordinates = tuple(bbox.astype(np.int)) crop_xmin, crop_ymin, crop_xmax, crop_ymax = crop_coordinates crop_w, crop_h = crop_xmax - crop_xmin, crop_ymax - crop_ymin if (crop_xmin < 0 or crop_ymin < 0 or crop_xmax >= image_w or crop_ymax >= image_h or crop_w <= 0 or crop_h <= 0): raise ValueError( 'Invalid crop box {}'.format(crop_coordinates)) crop_w = crop_xmax - crop_xmin crop_h = crop_ymax - crop_ymin if crop_w * crop_h < 20: raise ValueError('Crop area too small: {}x{}'.format( crop_w, crop_h)) word_crop_im = im.crop(crop_coordinates) im_buff = io.BytesIO() word_crop_im.save(im_buff, format='jpeg') word_crop_jpeg = im_buff.getvalue() crop_name = '{}:{}'.format(image_rel_path, i) word_crop_w, word_crop_h = word_crop_im.size # fit curves to chars polygon points and divide the curve char_polygons = char_polygons_list[i] crop_xymin = [crop_xmin, crop_ymin] rel_char_polygons = char_polygons - [[crop_xymin]] with warnings.catch_warnings(): warnings.simplefilter('error', np.RankWarning) try: top_curve_points = _fit_and_divide( rel_char_polygons[:, :2, :].reshape([-1, 2])) bottom_curve_points = _fit_and_divide( rel_char_polygons[:, 2:, :].reshape([-1, 2])) except np.RankWarning: raise ValueError('Bad polyfit.') curve_points = np.concatenate( [top_curve_points, bottom_curve_points], axis=0) flat_curve_points = curve_points.flatten().tolist() if FLAGS.num_dump_images > 0 and dump_images_count < FLAGS.num_dump_images: def _draw_cross(draw, center, size=2): left_pt = tuple(center - [size, 0]) right_pt = tuple(center + [size, 0]) top_pt = tuple(center - [0, size]) bottom_pt = tuple(center + [0, size]) draw.line([top_pt, bottom_pt], width=1, fill='#ffffff') draw.line([left_pt, right_pt], width=1, fill='#ffffff') save_fname = 'aster/vis/{}_{}.jpg'.format(count, words[i]) draw = ImageDraw.Draw(word_crop_im) for pts in curve_points: _draw_cross(draw, pts) word_crop_im.save(save_fname) dump_images_count += 1 # write an example example = tf.train.Example(features=tf.train.Features(feature={ fields.TfExampleFields.image_encoded: \ dataset_util.bytes_feature(word_crop_jpeg), fields.TfExampleFields.image_format: \ dataset_util.bytes_feature('jpeg'.encode('utf-8')), fields.TfExampleFields.filename: \ dataset_util.bytes_feature(crop_name.encode('utf-8')), fields.TfExampleFields.channels: \ dataset_util.int64_feature(3), fields.TfExampleFields.colorspace: \ dataset_util.bytes_feature('rgb'.encode('utf-8')), fields.TfExampleFields.width: \ dataset_util.int64_feature(word_crop_w), fields.TfExampleFields.height: \ dataset_util.int64_feature(word_crop_h), fields.TfExampleFields.transcript: \ dataset_util.bytes_feature(words[i].encode('utf-8')), fields.TfExampleFields.keypoints: \ dataset_util.float_list_feature(flat_curve_points), })) writer.write(example.SerializeToString()) count += 1 except ValueError as err: print("ValueError: {}".format(err)) skipped += 1 continue # except: # print("Unexpected error:", sys.exc_info()[0]) # skipped += 1 # continue # except: # print("Image #{} error:".format(index), sys.exc_info()[0]) # continue print('{} samples created, {} skipped'.format(count, skipped)) writer.close()