def cvt_to_tfrecords(output_path , data_path, gt_path, records_per_file = 50000): fetcher = SynthTextDataFetcher(root_path = data_path, mat_path = gt_path) image_idxes = range(fetcher.num_images) np.random.shuffle(image_idxes) record_count = 0; for image_idx in image_idxes: if record_count % records_per_file == 0: fid = record_count / records_per_file tfrecord_writer = tf.python_io.TFRecordWriter(output_path%(fid)) print("converting image %d/%d"%(record_count, fetcher.num_images)) record = fetcher.fetch_record(image_idx); if record is None: print('\nimage %d does not exist'%(image_idx + 1)) continue; record_count += 1 image_path, image, txts, rect_bboxes, oriented_bboxes = record; labels = []; for txt in txts: if len(txt) < 3: labels.append(config.ignore_label) else: labels.append(config.text_label) image_data = tf.gfile.FastGFile(image_path, 'r').read() shape = image.shape image_name = str(util.io.get_filename(image_path).split('.')[0]) example = convert_to_example(image_data, image_name, labels, txts, rect_bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString())
def cvt_to_tfrecords(output_path, data_path, gt_path, records_per_file=50000): fetcher = SynthTextDataFetcher(root_path=data_path, mat_path=gt_path) fid = 0 image_idx = -1 while image_idx < fetcher.num_images: with tf.python_io.TFRecordWriter(output_path % (fid)) as tfrecord_writer: record_count = 0 while record_count != records_per_file: image_idx += 1 if image_idx >= fetcher.num_images: break print "loading image %d/%d" % (image_idx + 1, fetcher.num_images) record = fetcher.fetch_record(image_idx) if record is None: print '\nimage %d does not exist' % (image_idx + 1) continue image_path, image, txts, rect_bboxes, oriented_bboxes = record labels = len(rect_bboxes) * [1] ignored = len(rect_bboxes) * [0] image_data = tf.gfile.FastGFile(image_path, 'r').read() shape = image.shape image_name = str( util.io.get_filename(image_path).split('.')[0]) example = convert_to_example(image_data, image_name, labels, ignored, txts, rect_bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString()) record_count += 1 fid += 1
def cvt_to_tfrecords(output_path , data_path, gt_path, records_per_file = 50000): fetcher = SynthTextDataFetcher(root_path = data_path, mat_path = gt_path) fid = 0 image_idx = -1 while image_idx < fetcher.num_images: with tf.python_io.TFRecordWriter(output_path%(fid)) as tfrecord_writer: record_count = 0; while record_count != records_per_file: image_idx += 1; if image_idx >= fetcher.num_images: break; print "loading image %d/%d"%(image_idx + 1, fetcher.num_images) record = fetcher.fetch_record(image_idx); if record is None: print '\nimage %d does not exist'%(image_idx + 1) continue; image_path, image, txts, rect_bboxes, oriented_bboxes = record; labels = len(rect_bboxes) * [1]; ignored = len(rect_bboxes) * [0]; image_data = tf.gfile.FastGFile(image_path, 'r').read() shape = image.shape image_name = str(util.io.get_filename(image_path).split('.')[0]) example = convert_to_example(image_data, image_name, labels, ignored, txts, rect_bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString()) record_count += 1; fid += 1;
def cvt_to_tfrecords(output_path, data_path, gt_path): image_names = util.io.ls(data_path, '.jpg') #[0:10]; print("%d images found in %s" % (len(image_names), data_path)) with tf.python_io.TFRecordWriter(output_path) as tfrecord_writer: for idx, image_name in enumerate(image_names): oriented_bboxes = [] bboxes = [] labels = [] labels_text = [] path = util.io.join_path(data_path, image_name) print("\tconverting image: %d/%d %s" % (idx, len(image_names), image_name)) image_data = tf.gfile.FastGFile(path, 'r').read() image = util.img.imread(path, rgb=True) shape = image.shape h, w = shape[0:2] h *= 1.0 w *= 1.0 image_name = util.str.split(image_name, '.')[0] gt_name = 'gt_' + image_name + '.txt' gt_filepath = util.io.join_path(gt_path, gt_name) lines = util.io.read_lines(gt_filepath) for line in lines: line = util.str.remove_all(line, '\xef\xbb\xbf') gt = util.str.split(line, ',') oriented_box = [int(gt[i]) for i in range(8)] oriented_box = np.asarray(oriented_box) / ([w, h] * 4) oriented_bboxes.append(oriented_box) xs = oriented_box.reshape(4, 2)[:, 0] ys = oriented_box.reshape(4, 2)[:, 1] def to_valid_range(v): if v < 0: return 0 if v > 1: return 1 return v xmin = to_valid_range(xs.min()) xmax = to_valid_range(xs.max()) ymin = to_valid_range(ys.min()) ymax = to_valid_range(ys.max()) bboxes.append([xmin, ymin, xmax, ymax]) # might be wrong here, but it doesn't matter because the label is not going to be used in detection labels_text.append(gt[-1]) ignored = util.str.contains(gt[-1], '###') if ignored: labels.append(config.ignore_label) else: labels.append(config.text_label) example = convert_to_example(image_data, image_name, labels, labels_text, bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString())
def cvt_to_tfrecords(output_path , data_path, gt_path): image_names = util.io.ls(data_path, '.jpg') print "{0} images found in {1}".format(len(image_names), data_path) with tf.python_io.TFRecordWriter(output_path) as tfrecord_writer: for idx, image_name in enumerate(image_names): oriented_bboxes = [] bboxes = [] labels = [] labels_text = [] ignored = [] path = util.io.join_path(data_path, image_name) print "\tconverting image: {0}/{1} {2}".format(idx, len(image_names), image_name) image_data = tf.gfile.FastGFile(path, 'r').read() image = util.img.imread(path, rgb=True) shape = image.shape h, w = shape[0:2] h *= 1.0 w *= 1.0 image_name = util.str.split(image_name, '.')[0] # without .jpg print image_name # read ground-truth gt_name = 'res_' + image_name + '.txt' gt_filepath = util.io.join_path(gt_path, gt_name) lines = util.io.read_lines(gt_filepath) for line in lines: line = util.str.remove_all(line, '\xef\xbb\xbf') gt = util.str.split(line, ',') oriented_box = [int(gt[i]) for i in range(8)] # oriented_box = np.asarray(oriented_box) / ([w, h] * 4) oriented_box = np.asarray(oriented_box) / ([w, h, w, h, w, h, w, h]) oriented_bboxes.append(oriented_box) xs = oriented_box.reshape(4, 2)[:, 0] ys = oriented_box.reshape(4, 2)[:, 1] xmin = xs.min() xmax = xs.max() ymin = ys.min() ymax = ys.max() bboxes.append([xmin, ymin, xmax, ymax]) ignored.append(util.str.contains(gt[-1], '###')) # might be wrong here, but it doesn't matter because the label is not going to be used in detection labels_text.append(gt[-1]) labels.append(1) example = convert_to_example(image_data, image_name, labels, ignored, labels_text, bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString()) """
def _add_to_tfrecord(dataset_dir, name, tfrecord_writer): """Loads data from image and annotations files and add them to a TFRecord. Args: dataset_dir: Dataset directory; name: Image name to add to the TFRecord; tfrecord_writer: The TFRecord writer to use for writing. """ image_data, filename, labels, labels_text, bboxes, oriented_bboxes, shape = \ _process_image(dataset_dir, name) example = convert_to_example(image_data, filename, labels, labels_text, bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString())
def convert_to_tfrecords(file_path, text_path, out_path): output_dir = os.path.dirname(out_path) if not os.path.exists(output_dir): os.makedirs(output_dir) img_paths = load_file(file_path) print("the total img nums is:{}".format(len(img_paths))) with tf.python_io.TFRecordWriter(out_path + "train_tfrecords") as tfrecord_writer: for idx, path in enumerate(img_paths): with tf.gfile.FastGFile(path, 'rb') as f: image_data = f.read() # read img img, h, w = read_img(path) if img is None: print("the current img is empty") continue # read txt info oriented_bboxes = [] bboxes = [] labels = [] labels_text = [] ignored = [] img_name = str(path.split("/")[-1][:-4]) txt_path = os.path.join(text_path, img_name + ".txt") with open(txt_path, "r", encoding="utf-8") as f: lines = f.readlines() for line in lines: line_array = np.array(line.strip().split(","))[:8] line_array = line_array.astype(float) oriented_box = line_array / ([w, h] * 4) oriented_bboxes.append(oriented_box) xy_list = np.reshape(oriented_box, (4, 2)) xmin = xy_list[:, 0].min() xmax = xy_list[:, 0].max() ymin = xy_list[:, 1].min() ymax = xy_list[:, 1].max() bboxes.append([ max(0., xmin), max(0., ymin), min(xmax, 1.), min(ymax, 1.) ]) ignored.append(0) labels_text.append(b"text") labels.append(1) img_name = str.encode(str(img_name)) example = convert_to_example(image_data, img_name, labels, ignored, labels_text, bboxes, oriented_bboxes, img.shape) tfrecord_writer.write(example.SerializeToString()) if idx % 100 == 0: print(idx)
def cvt_to_tfrecords(image_names, output_path, data_path, gt_path, begin, end): with tf.python_io.TFRecordWriter(output_path) as tfrecord_writer: ii = 0 for idx, image_name in enumerate(image_names[begin:end]): oriented_bboxes = [] bboxes = [] bboxesn = [] labels = [] labels_text = [] path = util.io.join_path(data_path, image_name) image_data = tf.gfile.FastGFile(path, 'r').read() try: image = util.img.imread(path, rgb = True) ii = ii+1 except: continue print "\tconverting image: %d/%d %s" % (ii, len(image_names), image_name) shape = image.shape h, w = shape[0:2] h *= 1.0 w *= 1.0 image_name = image_name[0:-4] gt_name = image_name + '.txt' gt_filepath = util.io.join_path(gt_path, gt_name) lines = util.io.read_lines(gt_filepath) for line in lines: line = util.str.remove_all(line, '\xef\xbb\xbf') gt = util.str.split(line, ',') oriented_box = [int(float(gt[i])) for i in range(8)] oriented_box = np.asarray(oriented_box) / ([w, h] * 4) oriented_bboxes.append(oriented_box) xs = oriented_box.reshape(4, 2)[:, 0] ys = oriented_box.reshape(4, 2)[:, 1] xmin = to_valid(xs.min()) xmax = to_valid(xs.max()) ymin = to_valid(ys.min()) ymax = to_valid(ys.max()) bboxes.append([xmin, ymin, xmax, ymax]) labels_text.append(gt[-1]) ignored = util.str.contains(gt[-1], '###') if ignored: labels.append(config.ignore_label) else: labels.append(config.text_label) example = convert_to_example(image_data, image_name, labels, labels_text, bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString())
def cvt_to_tfrecords(output_path , data_path, gt_path): image_names = util.io.ls(data_path, '.jpg')#[0:10]; print "%d images found in %s"%(len(image_names), data_path); with tf.python_io.TFRecordWriter(output_path) as tfrecord_writer: for idx, image_name in enumerate(image_names): oriented_bboxes = []; bboxes = [] labels = []; labels_text = []; path = util.io.join_path(data_path, image_name); print "\tconverting image: %d/%d %s"%(idx, len(image_names), image_name); image_data = tf.gfile.FastGFile(path, 'r').read() image = util.img.imread(path, rgb = True); shape = image.shape h, w = shape[0:2]; h *= 1.0; w *= 1.0; image_name = util.str.split(image_name, '.')[0]; gt_name = 'gt_' + image_name + '.txt'; gt_filepath = util.io.join_path(gt_path, gt_name); lines = util.io.read_lines(gt_filepath); for line in lines: line = util.str.remove_all(line, '\xef\xbb\xbf') gt = util.str.split(line, ','); oriented_box = [int(gt[i]) for i in range(8)]; oriented_box = np.asarray(oriented_box) / ([w, h] * 4); oriented_bboxes.append(oriented_box); xs = oriented_box.reshape(4, 2)[:, 0] ys = oriented_box.reshape(4, 2)[:, 1] xmin = xs.min() xmax = xs.max() ymin = ys.min() ymax = ys.max() bboxes.append([xmin, ymin, xmax, ymax]) # might be wrong here, but it doesn't matter because the label is not going to be used in detection labels_text.append(gt[-1]); ignored = util.str.contains(gt[-1], '###') if ignored: labels.append(config.ignore_label); else: labels.append(config.text_label) example = convert_to_example(image_data, image_name, labels, labels_text, bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString())
def cvt_to_tfrecords(output_path , images_dir, annotations_dir, files_txt): with tf.python_io.TFRecordWriter(output_path) as tfrecord_writer: lines = util.io.read_lines(files_txt) count = len(lines) for idx, filename in enumerate(lines): oriented_bboxes = [] bboxes = [] labels = [] labels_text = [] filename = filename.replace('\n', '') print('{} / {} {}'.format(idx, count, filename)) annotation_path = util.io.join_path(annotations_dir, filename + '.xml') annotation_xml = ET.ElementTree(file=annotation_path) image_filename = annotation_xml.find('filename').text image_path = util.io.join_path(images_dir, image_filename) image_data = tf.gfile.FastGFile(image_path, 'rb').read() image = util.img.imread(image_path, rgb=True) shape = image.shape h, w = shape[0:2] h *= 1.0 w *= 1.0 for obj_ele in annotation_xml.iterfind('object'): content = obj_ele.find('content').text content = str.encode(content) labels_text.append(content) labels.append(config.text_label) bndbox = obj_ele.find('bndbox') xmin = int(bndbox.find('xmin').text) / w ymin = int(bndbox.find('ymin').text) / h xmax = int(bndbox.find('xmax').text) / w ymax = int(bndbox.find('ymax').text) / h bboxes.append([xmin, ymin, xmax, ymax]) if xmin < 0 or ymin < 0 or xmax > 1 or ymax > 1: print(filename) return oriented_box = [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymin] oriented_bboxes.append(oriented_box) filename = str.encode(filename) example = convert_to_example(image_data, filename, labels, labels_text, bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString())
def cvt_to_tfrecords(output_path , data_path, gt_path): image_names = util.io.ls(data_path, '.jpg')#[0:10]; print "%d images found in %s"%(len(image_names), data_path); with tf.python_io.TFRecordWriter(output_path) as tfrecord_writer: for idx, image_name in enumerate(image_names): oriented_bboxes = []; bboxes = [] labels = []; labels_text = []; ignored = [] path = util.io.join_path(data_path, image_name); print "\tconverting image: %d/%d %s"%(idx, len(image_names), image_name); image_data = tf.gfile.FastGFile(path, 'r').read() image = util.img.imread(path, rgb = True); shape = image.shape h, w = shape[0:2]; h *= 1.0; w *= 1.0; image_name = util.str.split(image_name, '.')[0]; gt_name = 'gt_' + image_name + '.txt'; gt_filepath = util.io.join_path(gt_path, gt_name); lines = util.io.read_lines(gt_filepath); for line in lines: line = util.str.remove_all(line, '\xef\xbb\xbf') gt = util.str.split(line, ','); oriented_box = [int(gt[i]) for i in range(8)]; oriented_box = np.asarray(oriented_box) / ([w, h] * 4); oriented_bboxes.append(oriented_box); xs = oriented_box.reshape(4, 2)[:, 0] ys = oriented_box.reshape(4, 2)[:, 1] xmin = xs.min() xmax = xs.max() ymin = ys.min() ymax = ys.max() bboxes.append([xmin, ymin, xmax, ymax]) ignored.append(util.str.contains(gt[-1], '###')); # might be wrong here, but it doesn't matter because the label is not going to be used in detection labels_text.append(gt[-1]); labels.append(1); example = convert_to_example(image_data, image_name, labels, ignored, labels_text, bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString())
def cvt_to_tfrecords(output_path, data_path, gt_path): image_names = util.io.ls(data_path, '.jpg') #[0:10]; print "%d images found in %s" % (len(image_names), data_path) with tf.python_io.TFRecordWriter(output_path) as tfrecord_writer: for idx, image_name in enumerate(image_names): bboxes = [] oriented_bboxes = [] labels = [] labels_text = [] ignored = [] path = util.io.join_path(data_path, image_name) if not util.img.is_valid_jpg(path): continue image = util.img.imread(path) print "\tconverting image:%s, %d/%d" % (image_name, idx, len(image_names)) image_data = tf.gfile.FastGFile(path, 'r').read() #image = util.img.imread(path, rgb = True); shape = image.shape h, w = shape[0:2] h *= 1.0 w *= 1.0 image_name = util.str.split(image_name, '.')[0] gt_name = image_name + '.txt' gt_filepath = util.io.join_path(gt_path, gt_name) lines = util.io.read_lines(gt_filepath) for line in lines: spt = line.split(',') locs = spt[0:-1] xmin, ymin, bw, bh = [int(v) for v in locs] xmax = xmin + bw - 1 ymax = ymin + bh - 1 xmin, ymin, xmax, ymax = xmin / w, ymin / h, xmax / w, ymax / h bboxes.append([xmin, ymin, xmax, ymax]) oriented_bboxes.append( [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]) labels_text.append(str(spt[-1])) labels.append(1) ignored.append(0) example = convert_to_example(image_data, image_name, labels, ignored, labels_text, bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString())
def cvt_to_tfrecords(output_path, data_path, gt_path): image_names = util.io.ls(data_path, '.jpg') #[0:10]; print "%d images found in %s" % (len(image_names), data_path) with tf.python_io.TFRecordWriter(output_path) as tfrecord_writer: for idx, image_name in enumerate(image_names): oriented_bboxes = [] bboxes = [] labels = [] labels_text = [] ignored = [] path = util.io.join_path(data_path, image_name) print "\tconverting image: %d/%d %s" % (idx, len(image_names), image_name) image_data = tf.gfile.FastGFile(path, 'r').read() image = util.img.imread(path, rgb=True) shape = image.shape h, w = shape[0:2] h *= 1.0 w *= 1.0 image_name = util.str.split(image_name, '.')[0] gt_name = 'gt_' + image_name + '.txt' gt_filepath = util.io.join_path(gt_path, gt_name) lines = util.io.read_lines(gt_filepath) for line in lines: gt = util.str.remove_all(line, ',') gt = util.str.split(gt, ' ') bbox = [int(gt[i]) for i in range(4)] xmin, ymin, xmax, ymax = np.asarray(bbox) / [w, h, w, h] oriented_bboxes.append( [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]) bboxes.append([xmin, ymin, xmax, ymax]) ignored.append(0) labels_text.append(line.split('"')[1]) labels.append(1) example = convert_to_example(image_data, image_name, labels, ignored, labels_text, bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString())
def cvt_to_tfrecords(output_path , data_path, gt_path): image_names = util.io.ls(data_path, '.jpg')#[0:10]; print "%d images found in %s"%(len(image_names), data_path); with tf.python_io.TFRecordWriter(output_path) as tfrecord_writer: for idx, image_name in enumerate(image_names): bboxes = []; oriented_bboxes = [] labels = []; labels_text = []; ignored = [] path = util.io.join_path(data_path, image_name); if not util.img.is_valid_jpg(path): continue image = util.img.imread(path) print "\tconverting image:%s, %d/%d"%(image_name, idx, len(image_names)); image_data = tf.gfile.FastGFile(path, 'r').read() #image = util.img.imread(path, rgb = True); shape = image.shape h, w = shape[0:2]; h *= 1.0; w *= 1.0; image_name = util.str.split(image_name, '.')[0]; gt_name = image_name + '.txt'; gt_filepath = util.io.join_path(gt_path, gt_name); lines = util.io.read_lines(gt_filepath); for line in lines: spt = line.split(',') locs = spt[0: -1] xmin, ymin, bw, bh = [int(v) for v in locs] xmax = xmin + bw - 1 ymax = ymin + bh - 1 xmin, ymin, xmax, ymax = xmin / w, ymin/ h, xmax / w, ymax / h bboxes.append([xmin, ymin, xmax, ymax]); oriented_bboxes.append([xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]) labels_text.append(str(spt[-1])); labels.append(1); ignored.append(0) example = convert_to_example(image_data, image_name, labels, ignored, labels_text, bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString())
def cvt_to_tfrecords(output_path, data_path, gt_path): image_names = util.io.ls(data_path, '.jpg') #[0:10]; print "%d images found in %s" % (len(image_names), data_path) with tf.python_io.TFRecordWriter(output_path) as tfrecord_writer: for idx, image_name in enumerate(image_names): oriented_bboxes = [] bboxes = [] labels = [] labels_text = [] path = util.io.join_path(data_path, image_name) print "\tconverting image: %d/%d %s" % (idx, len(image_names), image_name) image_data = tf.gfile.FastGFile(path, 'r').read() image = util.img.imread(path, rgb=True) shape = image.shape h, w = shape[0:2] h *= 1.0 w *= 1.0 image_name = util.str.split(image_name, '.')[0] ## Annotations gt_name = image_name + '.json' gt_filepath = util.io.join_path(gt_path, gt_name) if not os.path.exists(gt_filepath): continue # print 'gt_name', gt_name, gt_filepath ### json file with open(gt_filepath) as jsonbuffer: jsondict = json.loads(jsonbuffer.read()) print " >>>>>> ", image_name, w, jsondict[ 'imageWidth'], h, jsondict['imageHeight'] for obj in jsondict['shapes']: gt = [] for pt in obj['points']: ptx, pty = pt ptx = check_out_boundary(ptx, 0, w) pty = check_out_boundary(pty, 0, h) gt.append(ptx) gt.append(pty) num_pts = len(obj['points']) oriented_box = [int(gt[i]) for i in range(len(gt))] oriented_box = np.asarray(oriented_box) / ([w, h] * num_pts) oriented_bboxes.append(oriented_box) xs = oriented_box.reshape(num_pts, 2)[:, 0] ys = oriented_box.reshape(num_pts, 2)[:, 1] # for pt in obj['points']: # ptx, pty = pt # ptx = check_out_boundary(ptx, 0, w) # pty = check_out_boundary(pty, 0 ,h) # gt.append(pty) # gt.append(ptx) # num_pts = len(obj['points']) # oriented_box = [int(gt[i]) for i in range(len(gt))] # oriented_box = np.asarray(oriented_box) / ([h, w] * num_pts) # oriented_bboxes.append(oriented_box) # ys = oriented_box.reshape(num_pts, 2)[:, 0] # xs = oriented_box.reshape(num_pts, 2)[:, 1] xmin = xs.min() xmax = xs.max() ymin = ys.min() ymax = ys.max() bboxes.append([ymin, xmin, ymax, xmax]) print " >>>>> ", num_pts, len( gt), '\t', xmin, ymin, xmax, ymax labels_text.append( str(obj['label'].encode('utf8')) ) ## unicode to byte, should convert back to unicode in result ignored = util.str.contains(obj['label'], '#') if ignored: labels.append(config.ignore_label) else: labels.append(config.text_label) example = convert_to_example(image_data, image_name, labels, labels_text, bboxes, oriented_bboxes, shape) tfrecord_writer.write(example.SerializeToString())