def create_ic15(output_path):
  writer = tf.python_io.TFRecordWriter(output_path)
  data_dirs = '/data/wangyz/Projects/Dataset/SynthText_Add/'
  groundtruth_file_pathes = '/data/wangyz/Projects/Dataset/SynthText_Add/annotationlist/'
  dataset_names = ['IC13' ,'IC15' , 'COCO-train','COCO-val']
  count = 0
  for datasetid in range(1,21):
    groundtruth_file_path = groundtruth_file_pathes + 'gt_'+str(datasetid)+'.txt'

    with open(groundtruth_file_path, 'r') as f:
      lines = f.readlines()
      img_gts = [line.strip() for line in lines]
      for img_gt in img_gts:

        content=img_gt.split(',')
        img_path=data_dirs + 'crop_img_' + str(datasetid) + '/' + content[0]
        gt = content[1][1:-1]
        #print(img_path)
        #print(gt)
        #input()
        if FLAGS.exclude_difficult and not char_check(gt):
          continue
        #img_path = os.path.join(FLAGS.data_dir, img_rel_path)
        img = Image.open(img_path)
        img=img.convert('RGB')
        img_buff = io.BytesIO()
        img.save(img_buff, format='jpeg')
        word_crop_jpeg = img_buff.getvalue()
        crop_name =  'SynAdd_' + os.path.basename(img_path)
        example = tf.train.Example(features=tf.train.Features(feature={
          fields.TfExampleFields.image_encoded: \
            dataset_util.bytes_feature(word_crop_jpeg),
          fields.TfExampleFields.image_format: \
            dataset_util.bytes_feature('jpeg'.encode('utf-8')),
          fields.TfExampleFields.filename: \
            dataset_util.bytes_feature(crop_name.encode('utf-8')),
          fields.TfExampleFields.channels: \
            dataset_util.int64_feature(3),
          fields.TfExampleFields.colorspace: \
            dataset_util.bytes_feature('rgb'.encode('utf-8')),
          fields.TfExampleFields.transcript: \
            dataset_util.bytes_feature(gt.encode('utf-8')),
        }))
        writer.write(example.SerializeToString())
        count += 1
        if count % 2000 == 0:
          print(count)
  writer.close()
  print('{} examples created'.format(count))
def create_ic15(output_path):
    writer = tf.python_io.TFRecordWriter(output_path)

    groundtruth_file_path = os.path.join(FLAGS.data_dir,
                                         'test_groundtruth_all.txt')

    count = 0
    with open(groundtruth_file_path, 'r') as f:
        lines = f.readlines()
        img_gts = [line.strip() for line in lines]
        for img_gt in img_gts:
            img_rel_path, gt = img_gt.split(' ', 1)
            if FLAGS.exclude_difficult and not char_check(gt):
                continue
            img_path = os.path.join(FLAGS.data_dir, img_rel_path)
            img = Image.open(img_path)
            img_buff = io.BytesIO()
            img.save(img_buff, format='jpeg')
            word_crop_jpeg = img_buff.getvalue()
            crop_name = os.path.basename(img_path)

            example = tf.train.Example(features=tf.train.Features(feature={
              fields.TfExampleFields.image_encoded: \
                dataset_util.bytes_feature(word_crop_jpeg),
              fields.TfExampleFields.image_format: \
                dataset_util.bytes_feature('jpeg'.encode('utf-8')),
              fields.TfExampleFields.filename: \
                dataset_util.bytes_feature(crop_name.encode('utf-8')),
              fields.TfExampleFields.channels: \
                dataset_util.int64_feature(3),
              fields.TfExampleFields.colorspace: \
                dataset_util.bytes_feature('rgb'.encode('utf-8')),
              fields.TfExampleFields.transcript: \
                dataset_util.bytes_feature(gt.encode('utf-8')),
            }))
            writer.write(example.SerializeToString())
            count += 1

    writer.close()
    print('{} examples created'.format(count))
Example #3
0
def create_svt_subset(output_path):
    writer = tf.python_io.TFRecordWriter(output_path)
    test_xml_path = os.path.join(FLAGS.data_dir, 'test.xml')
    count = 0
    xml_root = ET.parse(test_xml_path).getroot()
    for image_node in xml_root.findall('image'):
        image_rel_path = image_node.find('imageName').text
        lexicon = image_node.find('lex').text.lower()
        lexicon = lexicon.split(',')
        image_path = os.path.join(FLAGS.data_dir, image_rel_path)
        image = Image.open(image_path)
        image_w, image_h = image.size

        for i, rect in enumerate(image_node.find('taggedRectangles')):
            bbox_x = float(rect.get('x'))
            bbox_y = float(rect.get('y'))
            bbox_w = float(rect.get('width'))
            bbox_h = float(rect.get('height'))
            if FLAGS.crop_margin > 0:
                margin = bbox_h * FLAGS.crop_margin
                bbox_x = bbox_x - margin
                bbox_y = bbox_y - margin
                bbox_w = bbox_w + 2 * margin
                bbox_h = bbox_h + 2 * margin
            bbox_xmin = int(round(max(0, bbox_x)))
            bbox_ymin = int(round(max(0, bbox_y)))
            bbox_xmax = int(round(min(image_w - 1, bbox_x + bbox_w)))
            bbox_ymax = int(round(min(image_h - 1, bbox_y + bbox_h)))

            word_crop_im = image.crop(
                (bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax))
            im_buff = io.BytesIO()
            word_crop_im.save(im_buff, format='jpeg')
            word_crop_jpeg = im_buff.getvalue()
            crop_name = '{}:{}'.format(image_rel_path, i)

            groundtruth_text = rect.find('tag').text.lower()

            example = tf.train.Example(features=tf.train.Features(feature={
              fields.TfExampleFields.image_encoded: \
                dataset_util.bytes_feature(word_crop_jpeg),
              fields.TfExampleFields.image_format: \
                dataset_util.bytes_feature('jpeg'.encode('utf-8')),
              fields.TfExampleFields.filename: \
                dataset_util.bytes_feature(crop_name.encode('utf-8')),
              fields.TfExampleFields.channels: \
                dataset_util.int64_feature(3),
              fields.TfExampleFields.colorspace: \
                dataset_util.bytes_feature('rgb'.encode('utf-8')),
              fields.TfExampleFields.transcript: \
                dataset_util.bytes_feature(groundtruth_text.encode('utf-8')),
              fields.TfExampleFields.lexicon: \
                dataset_util.bytes_feature(('\t'.join(lexicon)).encode('utf-8')),
            }))
            writer.write(example.SerializeToString())
            count += 1

    writer.close()
    print('{} examples created'.format(count))
def create_cute80(output_path):
    writer = tf.python_io.TFRecordWriter(output_path)
    image_list_file = os.path.join(FLAGS.data_dir, 'imagelist.txt')
    with open(image_list_file, 'r') as f:
        tlines = [tline.rstrip('\n') for tline in f.readlines()]

    count = 0

    for tline in tlines:
        image_rel_path, groundtruth_text = \
          tline.split(' ')
        print(groundtruth_text)
        #print(len(groundtruth_text))
        #input()
        #groundtruth_text = groundtruth_text.lower()

        image_path = os.path.join(FLAGS.data_dir, image_rel_path)
        with open(image_path, 'rb') as f:
            image_jpeg = f.read()

        example = tf.train.Example(features=tf.train.Features(feature={
          fields.TfExampleFields.image_encoded: \
            dataset_util.bytes_feature(image_jpeg),
          fields.TfExampleFields.image_format: \
            dataset_util.bytes_feature('jpeg'.encode('utf-8')),
          fields.TfExampleFields.filename: \
            dataset_util.bytes_feature(image_rel_path.encode('utf-8')),
          fields.TfExampleFields.channels: \
            dataset_util.int64_feature(3),
          fields.TfExampleFields.colorspace: \
            dataset_util.bytes_feature('rgb'.encode('utf-8')),
          fields.TfExampleFields.transcript: \
            dataset_util.bytes_feature(groundtruth_text.encode('utf-8')),
        }))
        writer.write(example.SerializeToString())
        count += 1

    writer.close()
    print('{} examples created'.format(count))
Example #5
0
def create_iiit5k_subset(output_path, train_subset=True, lexicon_index=None):
    writer = tf.python_io.TFRecordWriter(output_path)

    mat_file_name = 'traindata.mat' if train_subset else 'testdata.mat'
    data_key = 'traindata' if train_subset else 'testdata'
    groundtruth_mat_path = os.path.join(FLAGS.data_dir, mat_file_name)

    mat_dict = sio.loadmat(groundtruth_mat_path)
    entries = mat_dict[data_key].flatten()
    for entry in tqdm(entries):
        image_rel_path = str(entry[0][0])
        groundtruth_text = str(entry[1][0])
        if not train_subset:
            lexicon = [str(t[0]) for t in entry[lexicon_index].flatten()]

        image_path = os.path.join(FLAGS.data_dir, image_rel_path)
        with open(image_path, 'rb') as f:
            image_jpeg = f.read()

        example = tf.train.Example(features=tf.train.Features(feature={
          fields.TfExampleFields.image_encoded: \
            dataset_util.bytes_feature(image_jpeg),
          fields.TfExampleFields.image_format: \
            dataset_util.bytes_feature('jpeg'.encode('utf-8')),
          fields.TfExampleFields.filename: \
            dataset_util.bytes_feature(image_rel_path.encode('utf-8')),
          fields.TfExampleFields.channels: \
            dataset_util.int64_feature(3),
          fields.TfExampleFields.colorspace: \
            dataset_util.bytes_feature('rgb'.encode('utf-8')),
          fields.TfExampleFields.transcript: \
            dataset_util.bytes_feature(groundtruth_text.encode('utf-8')),
          fields.TfExampleFields.lexicon: \
            dataset_util.bytes_feature(('\t'.join(lexicon)).encode('utf-8'))
        }))
        writer.write(example.SerializeToString())

    writer.close()
def create_ic15(output_path):
  writer = tf.python_io.TFRecordWriter(output_path)
  data_dirs = ['/data/wangyz/Projects/Dataset/IC13_WordRecognition/Challenge2_Training_Task3_Images_GT/',
  '/data/wangyz/Projects/Dataset/IC15Inc_WordRecognition/ch4_training_word_images_gt/',
  '/data/wangyz/Projects/Dataset/COCO_WordRecognition/train_words/',
  '/data/wangyz/Projects/Dataset/COCO_WordRecognition/val_words/']
  groundtruth_file_pathes = ['/data/wangyz/Projects/Dataset/IC13_WordRecognition/Challenge2_Training_Task3_Images_GT/gt.txt',
  '/data/wangyz/Projects/Dataset/IC15Inc_WordRecognition/ch4_training_word_images_gt/gt.txt',
  '/data/wangyz/Projects/Dataset/COCO_WordRecognition/train_words_gt.txt',
  '/data/wangyz/Projects/Dataset/COCO_WordRecognition/val_words_gt.txt',
  ]
  dataset_names = ['IC13' ,'IC15' , 'COCO-train','COCO-val']
  count = 0
  for datasetid in range(4):
    #datasetid = 0
    groundtruth_file_path = groundtruth_file_pathes[datasetid]

    with open(groundtruth_file_path, 'r') as f:
      lines = f.readlines()
      img_gts = [line.strip() for line in lines]
      for img_gt in img_gts:
        if datasetid == 0:
          content=img_gt.split(',')
          img_path=data_dirs[datasetid] + content[0]
          gt = content[1][2:-1]
        if datasetid == 1:
          content=img_gt.split(',')
          img_path=data_dirs[datasetid] + content[0]
          gt = content[1][2:-1]
        if datasetid == 2:
          content=img_gt.split(',')
          img_path=data_dirs[datasetid]+content[0]+'.jpg'
          ll=len(content[0])
          gt = img_gt[ll+1:]
        if datasetid == 3:
          content=img_gt.split(',')
          img_path=data_dirs[datasetid]+content[0]+'.jpg'
          ll=len(content[0])
          gt = img_gt[ll+1:]
        if FLAGS.exclude_difficult and not char_check(gt):
          continue
        #img_path = os.path.join(FLAGS.data_dir, img_rel_path)


        img = Image.open(img_path)
        img=img.convert('RGB')
        img_buff = io.BytesIO()
        img.save(img_buff, format='jpeg')
        word_crop_jpeg = img_buff.getvalue()
        crop_name = dataset_names[datasetid] + '_' + os.path.basename(img_path)
        gt = gt.replace(' ', '')
        #print(img_gt)
        #print(crop_name)
        #print(gt)
        #input()
        example = tf.train.Example(features=tf.train.Features(feature={
          fields.TfExampleFields.image_encoded: \
            dataset_util.bytes_feature(word_crop_jpeg),
          fields.TfExampleFields.image_format: \
            dataset_util.bytes_feature('jpeg'.encode('utf-8')),
          fields.TfExampleFields.filename: \
            dataset_util.bytes_feature(crop_name.encode('utf-8')),
          fields.TfExampleFields.channels: \
            dataset_util.int64_feature(3),
          fields.TfExampleFields.colorspace: \
            dataset_util.bytes_feature('rgb'.encode('utf-8')),
          fields.TfExampleFields.transcript: \
            dataset_util.bytes_feature(gt.encode('utf-8')),
        }))
        writer.write(example.SerializeToString())
        count += 1
        if count % 1000 == 0:
          print(count)
  writer.close()
  print('{} examples created'.format(count))
Example #7
0
def create_ic13(output_path):
    writer = tf.python_io.TFRecordWriter(output_path)

    groundtruth_dir = os.path.join(FLAGS.data_dir, 'Challenge2_Test_Task1_GT')
    groundtruth_files = glob.glob(os.path.join(groundtruth_dir, '*.txt'))

    count = 0
    for groundtruth_file in groundtruth_files:
        image_id = re.match(r'.*gt_img_(\d+).txt$', groundtruth_file).group(1)
        image_rel_path = 'img_{}.jpg'.format(image_id)
        image_path = os.path.join(FLAGS.data_dir,
                                  'Challenge2_Test_Task12_Images',
                                  image_rel_path)
        image = Image.open(image_path)
        image_w, image_h = image.size

        with open(groundtruth_file, 'r') as f:
            groundtruth = f.read()

        matches = re.finditer(r'^(\d+),\s(\d+),\s(\d+),\s(\d+),\s\"(.+)\"$',
                              groundtruth, re.MULTILINE)
        for i, match in enumerate(matches):
            bbox_xmin = float(match.group(1))
            bbox_ymin = float(match.group(2))
            bbox_xmax = float(match.group(3))
            bbox_ymax = float(match.group(4))
            groundtruth_text = match.group(5)

            if _is_difficult(groundtruth_text):
                continue

            if FLAGS.crop_margin > 0:
                bbox_h = bbox_ymax - bbox_ymin
                margin = bbox_h * FLAGS.crop_margin
                bbox_xmin = bbox_xmin - margin
                bbox_ymin = bbox_ymin - margin
                bbox_xmax = bbox_xmax + margin
                bbox_ymax = bbox_ymax + margin
            bbox_xmin = int(round(max(0, bbox_xmin)))
            bbox_ymin = int(round(max(0, bbox_ymin)))
            bbox_xmax = int(round(min(image_w - 1, bbox_xmax)))
            bbox_ymax = int(round(min(image_h - 1, bbox_ymax)))

            word_crop_im = image.crop(
                (bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax))
            im_buff = io.BytesIO()
            word_crop_im.save(im_buff, format='jpeg')
            word_crop_jpeg = im_buff.getvalue()
            crop_name = '{}:{}'.format(image_rel_path, i)

            example = tf.train.Example(features=tf.train.Features(feature={
              fields.TfExampleFields.image_encoded: \
                dataset_util.bytes_feature(word_crop_jpeg),
              fields.TfExampleFields.image_format: \
                dataset_util.bytes_feature('jpeg'.encode('utf-8')),
              fields.TfExampleFields.filename: \
                dataset_util.bytes_feature(crop_name.encode('utf-8')),
              fields.TfExampleFields.channels: \
                dataset_util.int64_feature(3),
              fields.TfExampleFields.colorspace: \
                dataset_util.bytes_feature('rgb'.encode('utf-8')),
              fields.TfExampleFields.transcript: \
                dataset_util.bytes_feature(groundtruth_text.encode('utf-8')),
            }))
            writer.write(example.SerializeToString())
            count += 1

    writer.close()
    print('{} examples created'.format(count))
Example #8
0
def create_ic03(output_path):
    writer = tf.python_io.TFRecordWriter(output_path)

    lexicon_file = os.path.join(FLAGS.data_dir, 'lexicon_full')
    with open(lexicon_file, 'r') as f:
        lexicon_list = [tline.rstrip('\n').lower() for tline in f.readlines()]

    xml_path = os.path.join(FLAGS.data_dir, 'words.xml')
    xml_root = ET.parse(xml_path).getroot()
    count = 0
    for image_node in xml_root.findall('image'):
        image_rel_path = image_node.find('imageName').text
        image_path = os.path.join(FLAGS.data_dir, image_rel_path)
        image = Image.open(image_path)
        image_w, image_h = image.size

        for i, rect in enumerate(image_node.find('taggedRectangles')):
            groundtruth_text = rect.find('tag').text.lower()
            if FLAGS.ignore_difficult and _is_difficult(groundtruth_text):
                # print('Ignoring {}'.format(groundtruth_text))
                continue

            bbox_x = float(rect.get('x'))
            bbox_y = float(rect.get('y'))
            bbox_w = float(rect.get('width'))
            bbox_h = float(rect.get('height'))
            if FLAGS.crop_margin > 0:
                margin = bbox_h * FLAGS.crop_margin
                bbox_x = bbox_x - margin
                bbox_y = bbox_y - margin
                bbox_w = bbox_w + 2 * margin
                bbox_h = bbox_h + 2 * margin
            bbox_xmin = int(round(max(0, bbox_x)))
            bbox_ymin = int(round(max(0, bbox_y)))
            bbox_xmax = int(round(min(image_w - 1, bbox_x + bbox_w)))
            bbox_ymax = int(round(min(image_h - 1, bbox_y + bbox_h)))

            word_crop_im = image.crop(
                (bbox_xmin, bbox_ymin, bbox_xmax, bbox_ymax))
            im_buff = io.BytesIO()
            word_crop_im.save(im_buff, format='jpeg')
            word_crop_jpeg = im_buff.getvalue()
            crop_name = '{}:{}'.format(image_rel_path, i)

            lexicon = _random_lexicon(lexicon_list, groundtruth_text,
                                      lexicon_size)

            example = tf.train.Example(features=tf.train.Features(feature={
              fields.TfExampleFields.image_encoded: \
                dataset_util.bytes_feature(word_crop_jpeg),
              fields.TfExampleFields.image_format: \
                dataset_util.bytes_feature('jpeg'.encode('utf-8')),
              fields.TfExampleFields.filename: \
                dataset_util.bytes_feature(crop_name.encode('utf-8')),
              fields.TfExampleFields.channels: \
                dataset_util.int64_feature(3),
              fields.TfExampleFields.colorspace: \
                dataset_util.bytes_feature('rgb'.encode('utf-8')),
              fields.TfExampleFields.transcript: \
                dataset_util.bytes_feature(groundtruth_text.encode('utf-8')),
              fields.TfExampleFields.lexicon: \
                dataset_util.bytes_feature(('\t'.join(lexicon)).encode('utf-8')),
            }))
            writer.write(example.SerializeToString())
            count += 1

    writer.close()
    print('{} examples created'.format(count))
Example #9
0
def main(_):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    # load groundtruth file
    groundtruth_file = os.path.join(FLAGS.data_dir, 'annotation.txt')
    with open(groundtruth_file, 'r') as f:
        groundtruth_lines = f.readlines()

    num_images = len(groundtruth_lines) - FLAGS.start_index
    if FLAGS.num_images > 0:
        num_images = min(num_images, FLAGS.num_images)

    indices = list(range(FLAGS.start_index, FLAGS.start_index + num_images))
    if FLAGS.shuffle:
        random.shuffle(indices)

    # a test decode pipeline for validating image
    image_jpeg_input = tf.placeholder(dtype=tf.string, shape=[])
    image = tf.image.decode_jpeg(image_jpeg_input,
                                 channels=3,
                                 try_recover_truncated=False,
                                 acceptable_fraction=1)

    with tf.Session() as sess:
        for index in tqdm(indices):
            image_rel_path = groundtruth_lines[index].split(' ')[0]
            image_path = os.path.join(FLAGS.data_dir, image_rel_path)

            # validate image
            valid = True
            image_jpeg = None
            try:
                with open(image_path, 'rb') as f:
                    image_jpeg = f.read()
                    image_output = sess.run(
                        image, feed_dict={image_jpeg_input: image_jpeg})
                    if (image_output.ndim != 3 or image_output.shape[0] == 0
                            or image_output.shape[1] == 0
                            or image_output.shape[2] != 3):
                        valid = False
            except:
                valid = False

            if not valid:
                logging.warn('Skip invalid image {}'.format(image_rel_path))
                continue

            # extract groundtruth
            groundtruth_text = image_rel_path.split('_')[1]

            # write example
            example = tf.train.Example(features=tf.train.Features(feature={
              fields.TfExampleFields.image_encoded: \
                dataset_util.bytes_feature(image_jpeg),
              fields.TfExampleFields.image_format: \
                dataset_util.bytes_feature('jpeg'.encode('utf-8')),
              fields.TfExampleFields.filename: \
                dataset_util.bytes_feature(image_rel_path.encode('utf-8')),
              fields.TfExampleFields.channels: \
                dataset_util.int64_feature(3),
              fields.TfExampleFields.colorspace: \
                dataset_util.bytes_feature('rgb'.encode('utf-8')),
              fields.TfExampleFields.transcript: \
                dataset_util.bytes_feature(groundtruth_text.encode('utf-8'))
            }))
            writer.write(example.SerializeToString())

    writer.close()
Example #10
0
def main(_):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)

    # load groundtruth file
    groundtruth_path = os.path.join(FLAGS.data_dir, 'gt.mat')
    if not os.path.exists(groundtruth_path):
        raise ValueError(
            'Could not find groundtruth file: {}'.format(groundtruth_path))
    print('Loading groundtruth...')
    groundtruth = sio.loadmat(groundtruth_path)

    num_images = groundtruth['wordBB'].shape[1] - FLAGS.start_index

    if FLAGS.num_images > 0:
        num_images = min(num_images, FLAGS.num_images)

    indices = list(range(FLAGS.start_index, FLAGS.start_index + num_images))
    if FLAGS.shuffle:
        random.shuffle(indices)

    count = 0
    skipped = 0
    dump_images_count = 0

    for index in tqdm(indices):
        image_rel_path = str(groundtruth['imnames'][0, index][0])
        text = groundtruth['txt'][0, index]
        word_polygons = groundtruth['wordBB'][0, index]
        char_polygons = np.transpose(groundtruth['charBB'][0, index],
                                     axes=[2, 1, 0])
        _create_samples_of_an_image(writer, image_rel_path, text,
                                    word_polygons, char_polygons)

    for index in tqdm(indices):
        # try:
        image_rel_path = str(groundtruth['imnames'][0, index][0])
        image_path = os.path.join(FLAGS.data_dir, image_rel_path)

        # load image jpeg data
        im = Image.open(image_path)
        im_width, im_height = im.size

        # word polygons
        word_polygons = groundtruth['wordBB'][0, index]
        if word_polygons.ndim == 2:
            word_polygons = np.expand_dims(word_polygons, axis=2)
        word_polygons = np.transpose(word_polygons, axes=[2, 1, 0])
        bbox_xymin = np.min(word_polygons, axis=1)
        bbox_xymax = np.max(word_polygons, axis=1)
        bbox_wh = bbox_xymax - bbox_xymin
        bbox_margin = np.expand_dims(margin_ratio *
                                     np.sqrt(bbox_wh[:, 0] * bbox_wh[:, 1]),
                                     axis=1)
        enlarged_bbox_xymin = np.maximum(
            bbox_xymin - bbox_margin, np.asarray([[0, 0]], dtype=np.float32))
        enlarged_bbox_xymax = np.minimum(
            bbox_xymax + bbox_margin,
            np.asarray([[im_width - 1, im_height - 1]], dtype=np.float32))
        bbox_array = np.concatenate([enlarged_bbox_xymin, enlarged_bbox_xymax],
                                    axis=1)
        bbox_array = np.round(bbox_array)
        num_bboxes = bbox_array.shape[0]

        # words
        text = groundtruth['txt'][0, index]
        words = []
        for text_line in text:
            text_line = str(text_line)
            line_words = ('\n'.join(text_line.split())).split('\n')
            words.extend(line_words)
        if len(words) != num_bboxes:
            raise ValueError(
                'Number of words and bboxes mismtach: {} vs {}'.format(
                    len(words), num_bboxes))

        # char polygons for every word
        all_char_polygons = np.transpose(groundtruth['charBB'][0, index],
                                         axes=[2, 1, 0])
        char_polygons_list = []
        offset = 0
        for word in words:
            word_len = len(word)
            char_polygons_list.append(all_char_polygons[offset:offset +
                                                        word_len])
            offset += word_len
        if offset != all_char_polygons.shape[0]:
            raise ValueError('Offset mismtach: {} vs {}'.format(
                offset, all_char_polygons.shape[0]))

        def _fit_and_divide(points):
            # points: [num_points, 2]
            degree = 2 if points.shape[0] > 2 else 1
            coeffs = np.polyfit(points[:, 0], points[:, 1], degree)
            poly_fn = np.poly1d(coeffs)
            xmin, xmax = np.min(points[:, 0]), np.max(points[:, 0])
            xs = np.linspace(xmin, xmax, num=(num_keypoints // 2))
            ys = poly_fn(xs)
            return np.stack([xs, ys], axis=1)

        image_w, image_h = im.size

        for i, bbox in enumerate(bbox_array):
            try:
                # crop image and encode to jpeg
                crop_coordinates = tuple(bbox.astype(np.int))
                crop_xmin, crop_ymin, crop_xmax, crop_ymax = crop_coordinates
                crop_w, crop_h = crop_xmax - crop_xmin, crop_ymax - crop_ymin
                if (crop_xmin < 0 or crop_ymin < 0 or crop_xmax >= image_w
                        or crop_ymax >= image_h or crop_w <= 0 or crop_h <= 0):
                    raise ValueError(
                        'Invalid crop box {}'.format(crop_coordinates))
                crop_w = crop_xmax - crop_xmin
                crop_h = crop_ymax - crop_ymin
                if crop_w * crop_h < 20:
                    raise ValueError('Crop area too small: {}x{}'.format(
                        crop_w, crop_h))

                word_crop_im = im.crop(crop_coordinates)
                im_buff = io.BytesIO()
                word_crop_im.save(im_buff, format='jpeg')
                word_crop_jpeg = im_buff.getvalue()
                crop_name = '{}:{}'.format(image_rel_path, i)
                word_crop_w, word_crop_h = word_crop_im.size

                # fit curves to chars polygon points and divide the curve
                char_polygons = char_polygons_list[i]
                crop_xymin = [crop_xmin, crop_ymin]
                rel_char_polygons = char_polygons - [[crop_xymin]]
                with warnings.catch_warnings():
                    warnings.simplefilter('error', np.RankWarning)
                    try:
                        top_curve_points = _fit_and_divide(
                            rel_char_polygons[:, :2, :].reshape([-1, 2]))
                        bottom_curve_points = _fit_and_divide(
                            rel_char_polygons[:, 2:, :].reshape([-1, 2]))
                    except np.RankWarning:
                        raise ValueError('Bad polyfit.')

                curve_points = np.concatenate(
                    [top_curve_points, bottom_curve_points], axis=0)
                flat_curve_points = curve_points.flatten().tolist()

                if FLAGS.num_dump_images > 0 and dump_images_count < FLAGS.num_dump_images:

                    def _draw_cross(draw, center, size=2):
                        left_pt = tuple(center - [size, 0])
                        right_pt = tuple(center + [size, 0])
                        top_pt = tuple(center - [0, size])
                        bottom_pt = tuple(center + [0, size])
                        draw.line([top_pt, bottom_pt], width=1, fill='#ffffff')
                        draw.line([left_pt, right_pt], width=1, fill='#ffffff')

                    save_fname = 'aster/vis/{}_{}.jpg'.format(count, words[i])
                    draw = ImageDraw.Draw(word_crop_im)
                    for pts in curve_points:
                        _draw_cross(draw, pts)
                    word_crop_im.save(save_fname)
                    dump_images_count += 1

                # write an example
                example = tf.train.Example(features=tf.train.Features(feature={
                    fields.TfExampleFields.image_encoded: \
                        dataset_util.bytes_feature(word_crop_jpeg),
                    fields.TfExampleFields.image_format: \
                        dataset_util.bytes_feature('jpeg'.encode('utf-8')),
                    fields.TfExampleFields.filename: \
                        dataset_util.bytes_feature(crop_name.encode('utf-8')),
                    fields.TfExampleFields.channels: \
                        dataset_util.int64_feature(3),
                    fields.TfExampleFields.colorspace: \
                        dataset_util.bytes_feature('rgb'.encode('utf-8')),
                    fields.TfExampleFields.width: \
                        dataset_util.int64_feature(word_crop_w),
                    fields.TfExampleFields.height: \
                        dataset_util.int64_feature(word_crop_h),
                    fields.TfExampleFields.transcript: \
                        dataset_util.bytes_feature(words[i].encode('utf-8')),
                    fields.TfExampleFields.keypoints: \
                        dataset_util.float_list_feature(flat_curve_points),
                }))

                writer.write(example.SerializeToString())
                count += 1
            except ValueError as err:
                print("ValueError: {}".format(err))
                skipped += 1
                continue
                # except:
                #   print("Unexpected error:", sys.exc_info()[0])
                #   skipped += 1
                #   continue

        # except:
        #   print("Image #{} error:".format(index), sys.exc_info()[0])
        #   continue

    print('{} samples created, {} skipped'.format(count, skipped))
    writer.close()