def _convert_to_example(image_data, labels, labels_text, bboxes, shape, difficult, truncated): """Build an Example proto for an image example. :return: Example proto. """ xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
def _convert_to_example(data_example): """Build an Example proto for an image example. Args: image_data: string, JPEG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; bboxes: list of bounding boxes; each box is a list of integers; specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. Returns: Example proto """ xmin = [] ymin = [] xmax = [] ymax = [] shape = [] difficult = [] truncated = [] label_text =[] for i in range(len(data_example['bboxes'])): difficult.append(0) truncated.append(0) label_text.append(b'face') for s in data_example['shape']: shape.append(s) # print(shape) image_data = data_example['image'] filename = data_example['name'] for bbox in data_example['bboxes']: assert len(bbox) == 4 # pylint: disable=expression-not-assigned xmin.append(bbox['xmin'] / shape[1]) ymin.append(bbox['ymin'] / shape[0]) xmax.append(bbox['xmax'] / shape[1]) ymax.append(bbox['ymax'] / shape[0]) # pylint: enable=expression-not-assigned # print(xmin) image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(data_example['labels']), 'image/object/bbox/label_text': bytes_feature(label_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/filename': bytes_feature(filename.encode('utf-8')), 'image/encoded': bytes_feature(image_data)})) return example
def _convert2example(image_data, shape, bboxes, labels, labels_text, difficult, truncated): y_min = [] x_min = [] y_max = [] x_max = [] for b in bboxes: assert len(b) == 4 [xy.append(point) for xy, point in zip([y_min, x_min, y_max, x_max], b)] image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(x_min), 'image/object/bbox/xmax': float_feature(x_max), 'image/object/bbox/ymin': float_feature(y_min), 'image/object/bbox/ymax': float_feature(y_max), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
def _convert_to_example(image_data, shape, bboxes, labels, labels_text): xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned image_format = b'PNG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
def _process_image(directory, split, name): # Read the image file. filename = os.path.join(directory, 'image_2', name + '.png') image_data = tf.gfile.FastGFile(filename, 'r').read() # Get shape img = cv2.imread(filename) shape = np.shape(img) label_list = [] type_list = [] bbox_x1_list = [] bbox_y1_list = [] bbox_x2_list = [] bbox_y2_list = [] # If 'test' split, skip annotations if re.findall(r'train', split): # Read the txt annotation file. filename = os.path.join(directory, 'label_2', name + '.txt') with open(filename) as anno_file: objects = anno_file.readlines() for object in objects: obj_anno = object.split(' ') type_txt = obj_anno[0].encode('ascii') if type_txt in CLASSES: label_list.append(CLASSES[type_txt]) type_list.append(type_txt) # Bounding Box bbox_x1 = float(obj_anno[4]) bbox_y1 = float(obj_anno[5]) bbox_x2 = float(obj_anno[6]) bbox_y2 = float(obj_anno[7]) bbox_x1_list.append(bbox_x1) bbox_y1_list.append(bbox_y1) bbox_x2_list.append(bbox_x2) bbox_y2_list.append(bbox_y2) image_format = b'PNG' example = tf.train.Example(features=tf.train.Features(feature={ 'image/encoded': bytes_feature(image_data), 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(bbox_x1_list), 'image/object/bbox/xmax': float_feature(bbox_x2_list), 'image/object/bbox/ymin': float_feature(bbox_y1_list), 'image/object/bbox/ymax': float_feature(bbox_y2_list), 'image/object/bbox/label': int64_feature(label_list), 'image/object/bbox/label_text': bytes_feature(type_list), })) return example
def _convert_to_example_multiphase_multislice_mask( nc_image_data, art_image_data, pv_image_data, mask_image_data, labels, labels_text, bboxes, shape, difficult, truncated): """Build an Example proto for an image example. Args: image_data: string, JPEG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; bboxes: list of bounding boxes; each box is a list of integers; specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. Returns: Example proto """ xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned image_format = suffix_type print('image_format is ', image_format) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/mask/encoded': bytes_feature(mask_image_data), 'image/nc/encoded': bytes_feature(nc_image_data), 'image/art/encoded': bytes_feature(art_image_data), 'image/pv/encoded': bytes_feature(pv_image_data) })) return example
def _convert_to_example(image_data_np, tumor_fully_mask_np, mask_np, liver_mask_np, bbox_np): """Build an Example proto for an image example. Args: image_data: string, JPEG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; bboxes: list of bounding boxes; each box is a list of integers; specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. Returns: Example proto """ bbox_np = np.asarray(np.asarray(bbox_np, np.float32) / 512., np.float32) xmin = [] ymin = [] xmax = [] ymax = [] labels = [] for b in bbox_np: assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] labels.append(1) image_format = b'PNG' example = tf.train.Example(features=tf.train.Features(feature={ # 'image/height': int64_feature(512), # 'image/width': int64_feature(512), # 'image/channels': int64_feature(3), # 'image/shape': int64_feature([512, 512, 3]), 'image/format': bytes_feature(image_format), # 'image/encoded': bytes_feature(np.asarray(image_data_np, np.float32).tostring()), # 'livermask/encoded': bytes_feature(np.asarray(liver_mask_np, np.uint8).tostring()), # 'fullAnnTumorMask/encoded': bytes_feature(np.asarray(tumor_fully_mask_np, np.uint8).tostring()), # 'maskimage/encoded': bytes_feature(np.asarray(mask_np, np.uint8).tostring())})) 'image/encoded': EncodedFloatFeature(np.asarray(image_data_np, np.float32)), 'livermask/encoded': EncodedInt64Feature(np.asarray(liver_mask_np, np.int64)), 'fullAnnTumorMask/encoded': EncodedInt64Feature(np.asarray(tumor_fully_mask_np, np.int64)), 'maskimage/encoded': EncodedInt64Feature(np.asarray(mask_np, np.int64)), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels)})) return example
def _convert_to_example(image_data, shape, bboxes, labels, labels_text, difficult, truncated): """ Build an Example proto for an image example. :param image_data: :param shape: :param bboxes: :param labels: :param labels_text: :param difficult: :param truncated: :return: Example proto """ xmin = [] ymin = [] xmax = [] ymax = [] # This three lines convert tuple (which contains a serial of bounding boxes into # list format. # e.g. [(1, 2, 3, 4), (5, 6, 7, 8)] ==> a list, each element is a bounding box. # will be convert to ymin=[1, 5], xmin=[2, 6], ymax=[3, 7] and xmax=[4, 8] for b in bboxes: assert len(b) == 4 [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
def build_example(user, image, text, label, file): return tf.train.Example(features=tf.train.Features( feature={ user_key: dataset_utils.bytes_feature(user), image_key: dataset_utils.float_feature(image), text_key: dataset_utils.int64_feature(text), label_key: dataset_utils.int64_feature(label), file_key: dataset_utils.bytes_feature(file), }))
def _convert_to_example(image_data, labels, labels_text, bboxes, shape, difficult, truncated): """Build an Example proto for an image example. 对于一张图片样例创建一个样例原型 Args: image_data: string, JPEG encoding of RGB image; JPEG编码格式的RGB图片 labels: list of integers, identifier for the ground truth; 类别码标签集合 labels_text: list of strings, human-readable labels; 类别标签集合 bboxes: list of bounding boxes; each box is a list of integers; bbox集合 specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. 以像素为单位的图片形状 Returns: Example proto 样本原型 """ xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: #将bbox的值以元组的方式进行打包进以上四个列表中 assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
def _convert_to_example(image_data, labels, bboxes, mask_data): """Build an Example proto for an image example. Args: image_data: string, JPEG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; bboxes: list of bounding boxes; each box is a list of integers; specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. Returns: Example proto """ xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned # image_format = b'JPEG' image_format = b'PNG' if mask_data is None: example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(512), 'image/width': int64_feature(512), 'image/channels': int64_feature(3), 'image/shape': int64_feature([512, 512, 3]), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data)})) else: example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(512), 'image/width': int64_feature(512), 'image/channels': int64_feature(3), 'image/shape': int64_feature([512, 512, 3]), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data), 'maskimage/encoded': bytes_feature(mask_data)})) return example
def _convert_to_example(image_data, shape, bbox, label): nbbox = np.array(bbox) ymin = list(nbbox[:, 0]) xmin = list(nbbox[:, 1]) ymax = list(nbbox[:, 2]) xmax = list(nbbox[:, 3]) #print 'shape: {}, height:{}, width:{}'.format(shape,shape[0],shape[1]) example = tf.train.Example(features=tf.train.Features(feature={ 'image/shape': int64_feature(list(shape)), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/label': int64_feature(label), 'image/format': bytes_feature('jpeg'), 'image/encoded': bytes_feature(image_data), })) return example
def _convert_to_example(image_data, shape, bboxes, labels, labels_text, difficult, truncated): """ Build an Example proto for an image example. :param image_data: string, JPEG encoding of RGB iamge; :param shape: list of 3 integers, image shape in pixels :param bboxes: list of tuples, each tuple is bounding box os an object specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. :param labels: list of integers, identifier for the ground truth :param labels_text: list of strings, human-readable labels. :return: An Example proto. """ xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
def _convert_to_example(image_data, labels, labels_text, bboxes, shape, difficult, truncated): """Build an Example proto for an image example. Args: image_data: string, JPEG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; bboxes: list of bounding boxes; each box is a list of integers; specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. Returns: Example proto """ xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data)})) return example
def _convert_to_example(image_data, shape, labels, labels_text, bboxes): """Build an Example proto for an image example. Args: image_data: string, PNG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; bboxes: list of bounding boxes; each box is a list of integers; specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. Returns: Example proto """ # Transpose bboxes, dimensions and locations. bboxes = list(map(list, zip(*bboxes))) # Iterators. it_bboxes = iter(bboxes) image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data), 'object/label': int64_feature(labels), 'object/label_text': bytes_feature(labels_text), 'object/bbox/xmin': float_feature(next(it_bboxes, [])), 'object/bbox/ymin': float_feature(next(it_bboxes, [])), 'object/bbox/xmax': float_feature(next(it_bboxes, [])), 'object/bbox/ymax': float_feature(next(it_bboxes, [])), })) return example
def _convert_to_example(filename, image_data, height, width, current_file_info, shared_info): colorspace = 'RGB' channels = 3 image_format = 'JPEG' (x_expanded, y_expanded, w_expanded, h_expanded, image_w, image_h, tags_id, original_image, face_xywh) = current_file_info feature = { 'image/x': dataset_utils.int64_feature(x_expanded), 'image/y': dataset_utils.int64_feature(y_expanded), 'image/height': dataset_utils.int64_feature(h_expanded), 'image/width': dataset_utils.int64_feature(w_expanded), 'image/face_xywh': dataset_utils.float_feature(face_xywh), # 'image/left_eye_xywh': dataset_utils.float_feature(left_eye_xywh), # 'image/right_eye_xywh': dataset_utils.float_feature(right_eye_xywh), # 'image/mouth_xywh': dataset_utils.float_feature(mouth_xywh), 'image/colorspace': dataset_utils.bytes_feature(colorspace), 'image/channels': dataset_utils.int64_feature(channels), 'image/format': dataset_utils.bytes_feature(image_format), 'image/filename': dataset_utils.bytes_feature(os.path.basename(filename)), 'image/encoded': dataset_utils.bytes_feature(image_data), # Encoding original takes up too much space. Not recommended. # 'image/original': dataset_utils.bytes_feature(original_image), } example = tf.train.Example(features=tf.train.Features(feature=feature)) return example
def _convert_to_example(image_data, gt_lanes, y_samples, shape): """Build an Example proto for an image example. Args: image_data: string, JPEG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; bboxes: list of bounding boxes; each box is a list of integers; specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. Returns: Example proto """ if len(gt_lanes) < 5: padding_list = [-2] * 56 padding = 5 - len(gt_lanes) for i in range(padding): gt_lanes.append(padding_list) image_format = b'JPG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/line1': float_feature(gt_lanes[0]), 'image/line2': float_feature(gt_lanes[1]), 'image/line3': float_feature(gt_lanes[2]), 'image/line4': float_feature(gt_lanes[3]), 'image/line5': float_feature(gt_lanes[4]), 'image/ysamples': float_feature(y_samples), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned image_format = suffix_type print('image_format is ', image_format) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/mask/encoded': bytes_feature(mask_image_data), 'image/nc/encoded': bytes_feature(nc_image_data), 'image/art/encoded': bytes_feature(art_image_data), 'image/pv/encoded': bytes_feature(pv_image_data)})) return example def _convert_to_example_multiphase_multislice(nc_image_data, art_image_data, pv_image_data, labels, labels_text,
def _process_image(directory, split, name): # Read the image file. filename = os.path.join(directory, 'image_2', name + '.png') image_data = tf.gfile.FastGFile(filename, 'r').read() # Get shape img = cv2.imread(filename) shape = np.shape(img) label_list = [] type_list = [] trun_list = [] occl_list = [] alpha_list = [] bbox_x1_list = [] bbox_y1_list = [] bbox_x2_list = [] bbox_y2_list = [] ddd_bbox_h_list = [] ddd_bbox_w_list = [] ddd_bbox_l_list = [] ddd_bbox_x_list = [] ddd_bbox_y_list = [] ddd_bbox_z_list = [] ddd_bbox_ry_list = [] # If 'test' split, skip annotations if re.findall(r'train', split): # Read the txt annotation file. filename = os.path.join(directory, 'label_2', name + '.txt') with open(filename) as anno_file: objects = anno_file.readlines() for object in objects: obj_anno = object.split(' ') type_txt = obj_anno[0].encode('ascii') truncation = float(obj_anno[1]) # [0..1] truncated pixel ratio occlusion = int(obj_anno[2]) # 0 = visible, 1 = partly occluded, 2 = fully occluded, 3 = unknown alpha = float(obj_anno[3]) # object observation angle([-pi..pi]) label_list.append(CLASSES[type_txt]) type_list.append(type_txt) trun_list.append(truncation) occl_list.append(occlusion) alpha_list.append(alpha) # Bounding Box bbox_x1 = float(obj_anno[4]) bbox_y1 = float(obj_anno[5]) bbox_x2 = float(obj_anno[6]) bbox_y2 = float(obj_anno[7]) bbox_x1_list.append(bbox_x1) bbox_y1_list.append(bbox_y1) bbox_x2_list.append(bbox_x2) bbox_y2_list.append(bbox_y2) # 3D Bounding Box ddd_bbox_h = float(obj_anno[8]) ddd_bbox_w = float(obj_anno[9]) ddd_bbox_l = float(obj_anno[10]) ddd_bbox_x = float(obj_anno[11]) ddd_bbox_y = float(obj_anno[12]) ddd_bbox_z = float(obj_anno[13]) ddd_bbox_ry = float(obj_anno[14]) ddd_bbox_h_list.append(ddd_bbox_h) ddd_bbox_w_list.append(ddd_bbox_w) ddd_bbox_l_list.append(ddd_bbox_l) ddd_bbox_x_list.append(ddd_bbox_x) ddd_bbox_y_list.append(ddd_bbox_y) ddd_bbox_z_list.append(ddd_bbox_z) ddd_bbox_ry_list.append(ddd_bbox_ry) image_format = b'PNG' example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(bbox_x1_list), 'image/object/bbox/xmax': float_feature(bbox_x2_list), 'image/object/bbox/ymin': float_feature(bbox_y1_list), 'image/object/bbox/ymax': float_feature(bbox_y2_list), 'image/object/bbox/label': int64_feature(label_list), 'image/object/bbox/label_text': bytes_feature(type_list), 'image/object/bbox/occlusion': int64_feature(occl_list), 'image/object/bbox/truncation': float_feature(trun_list), 'image/object/observation/alpha': float_feature(alpha_list), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data), 'image/object/3Dbbox/h': float_feature(ddd_bbox_h_list), 'image/object/3Dbbox/w': float_feature(ddd_bbox_w_list), 'image/object/3Dbbox/l': float_feature(ddd_bbox_l_list), 'image/object/3Dbbox/x': float_feature(ddd_bbox_x_list), 'image/object/3Dbbox/y': float_feature(ddd_bbox_y_list), 'image/object/3Dbbox/z': float_feature(ddd_bbox_z_list), 'image/object/3Dbbox/ry': float_feature(ddd_bbox_ry_list) })) return example
def write_images_from_directory(set_directory_name, set_directory_path, annotations_json, tfrecord_writer): sequences = sorted(os.listdir(set_directory_path)) for sequence in sequences: annotations_frames = annotations_json[set_directory_name][sequence][ 'frames'] image_path = os.path.join(set_directory_path, sequence + '/') images = sorted(os.listdir(image_path)) input_height = 480 input_width = 640 input_depth = 3 bboxes = [] labels = [] labels_text = [] difficult = [] truncated = [] for frame in range(len(images)): sys.stdout.write('\r>> Annotating image %d/%d' % (frame + 1, len(images))) bboxes_f = [] labels_f = [] labels_text_f = [] difficult_f = [] truncated_f = [] object_dicts_list = [] if str(frame) in annotations_frames: object_dicts_list = annotations_frames[str(frame)] for object_dict in object_dicts_list: if object_dict['lbl'] == 'person': #Classify further into person_full and person_occluded label_f = 'person' labels_f.append(int(LABELS[label_f][0])) labels_text_f.append(label_f.encode('ascii')) pos = object_dict['pos'] ymin = float(pos[1]) / input_height if ymin < 0.0: ymin = 0.0 if float(pos[1]) + float(pos[3]) > input_height: print("FRAME height:", frame, pos[1], pos[3]) ymax = 1.0 else: ymax = (float(pos[1]) + float(pos[3])) / input_height xmin = float(pos[0]) / input_width if xmin < 0.0: xmin = 0.0 if float(pos[0]) + float(pos[2]) > input_width: print("FRAME width:", frame, pos[0], pos[2]) xmax = 1.0 else: xmax = (float(pos[0]) + float(pos[2])) / input_width bboxes_f.append((ymin, xmin, ymax, xmax)) if object_dict['occl'] == 1: truncated_f.append(1) else: truncated_f.append(0) difficult_f.append(0) #elif object_dict['lbl'] == 'person?': # truncated_f.append(0) # difficult_f.append(1) #else: # truncated_f.append(0) # difficult_f.append(0) # Can check whether the object is occluded or not by # accessing object_dict['ocl'] == 1, if its 1, then it # is occluded. The associated bbox for the predicted # object (predicting stuff thats not occluded) is then # object_dict['pos']. If you just want the bbox for # what's visible, do object_dict['posv'] bboxes.append(bboxes_f) labels.append(labels_f) labels_text.append(labels_text_f) difficult.append(difficult_f) truncated.append(truncated_f) for i, imagename in enumerate(images): sys.stdout.write('\r>> Converting image %d/%d' % (i + 1, len(images))) sys.stdout.flush() image_file = image_path + imagename image_data = tf.gfile.FastGFile(image_file, 'rb').read() xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes[i]: [ l.append(point) for l, point in zip([xmin, ymin, xmax, ymax], b) ] # if len(bboxes[i]) != 0: image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(input_height), 'image/width': int64_feature(input_width), 'image/channels': int64_feature(input_depth), 'image/shape': int64_feature([input_height, input_width, input_depth]), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels[i]), 'image/object/bbox/label_text': bytes_feature(labels_text[i]), 'image/object/bbox/difficult': int64_feature(difficult[i]), 'image/object/bbox/truncated': int64_feature(truncated[i]), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) tfrecord_writer.write(example.SerializeToString())
def _convert_to_example(image_data, shape, labels, labels_text, truncated, occluded, alpha, bboxes, dimensions, locations, rotation_y): """Build an Example proto for an image example. Args: image_data: string, PNG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; bboxes: list of bounding boxes; each box is a list of integers; specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. Returns: Example proto """ # Transpose bboxes, dimensions and locations. bboxes = list(map(list, zip(*bboxes))) dimensions = list(map(list, zip(*dimensions))) locations = list(map(list, zip(*locations))) # Iterators. it_bboxes = iter(bboxes) it_dims = iter(dimensions) its_locs = iter(locations) image_format = b'PNG' example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data), 'object/label': int64_feature(labels), 'object/label_text': bytes_feature(labels_text), 'object/truncated': float_feature(truncated), 'object/occluded': int64_feature(occluded), 'object/alpha': float_feature(alpha), 'object/bbox/xmin': float_feature(next(it_bboxes, [])), 'object/bbox/ymin': float_feature(next(it_bboxes, [])), 'object/bbox/xmax': float_feature(next(it_bboxes, [])), 'object/bbox/ymax': float_feature(next(it_bboxes, [])), 'object/dimensions/height': float_feature(next(it_dims, [])), 'object/dimensions/width': float_feature(next(it_dims, [])), 'object/dimensions/length': float_feature(next(it_dims, [])), 'object/location/x': float_feature(next(its_locs, [])), 'object/location/y': float_feature(next(its_locs, [])), 'object/location/z': float_feature(next(its_locs, [])), 'object/rotation_y': float_feature(rotation_y), })) return example
def _convert_to_example(image_data, shape, charbb, bbox, label,imname): nbbox = np.array(bbox) ymin = list(nbbox[:, 0]) xmin = list(nbbox[:, 1]) ymax = list(nbbox[:, 2]) xmax = list(nbbox[:, 3]) #print 'shape: {}, height:{}, width:{}'.format(shape,shape[0],shape[1]) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/x0': float_feature(charbb[0,0,:].tolist()), 'image/object/bbox/x1': float_feature(charbb[0,1,:].tolist()), 'image/object/bbox/x2': float_feature(charbb[0,2,:].tolist()), 'image/object/bbox/x3': float_feature(charbb[0,3,:].tolist()), 'image/object/bbox/y0': float_feature(charbb[1,0,:].tolist()), 'image/object/bbox/y1': float_feature(charbb[1,1,:].tolist()), 'image/object/bbox/y2': float_feature(charbb[1,2,:].tolist()), 'image/object/bbox/y3': float_feature(charbb[1,3,:].tolist()), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/label': int64_feature(label), 'image/format': bytes_feature('jpeg'), 'image/encoded': bytes_feature(image_data), 'image/name': bytes_feature(imname.tostring()), })) return example
def main(_): print('Dataset directory: ./datasets') print('Output directory: ./datasets') print('Output name: caltech') tf_filename = './datasets/caltech.tfrecord' if tf.gfile.Exists(tf_filename): print('Dataset files already exist. Exiting without re-creating them.') return image_path = os.path.join('./datasets', 'JPEGImages/') annotations_path = os.path.join('./datasets', 'Annotations/') print('image path: ', image_path) print('annotations_path: ', annotations_path) images = sorted(os.listdir(image_path)) annotations_file = annotations_path+'annotations.json' annotations_text = open(annotations_file) annotations_json = json.load(annotations_text) annotations_frames = annotations_json['set01']['V000']['frames'] input_height = 480 input_width = 640 input_depth = 3 bboxes = [] labels = [] labels_text = [] difficult = [] truncated = [] for i, frame in enumerate(sorted(map(int, list(annotations_frames.keys())))): sys.stdout.write('\r>> Annotating image %d/%d' % (i + 1, len(list(annotations_frames.keys())))) bboxes_f = [] labels_f = [] labels_text_f = [] difficult_f = [] truncated_f = [] object_dicts_list = annotations_frames[str(frame)] for object_dict in object_dicts_list: if object_dict['lbl'] == 'person': #Classify further into person_full and person_occluded label_f = 'person' else if object_dict['lbl'] == 'people': label_f = 'people' else: label_f = 'none' labels_f.append(int(LABELS[label_f][0])) labels_text_f.append(label_f.encode('ascii')) # Can check whether the object is occluded or not by # accessing object_dict['ocl'] == 1, if its 1, then it # is occluded. The associated bbox for the predicted # object (predicting stuff thats not occluded) is then # object_dict['pos']. If you just want the bbox for # what's visible, do object_dict['posv'] difficult_f.append(0) truncated_f.append(0) pos = object_dict['pos'] bboxes_f.append((float(pos[1]) / input_height, float(float(pos[0])) / input_width, float(pos[1]+pos[3]) / input_height, float(pos[0]+pos[2]) / input_width )) bboxes.append(bboxes_f) labels.append(labels_f) labels_text.append(labels_text_f) difficult.append(difficult_f) truncated.append(truncated_f) with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer: for i, imagename in enumerate(images): sys.stdout.write('\r>> Converting image %d/%d' % (i + 1, len(images))) sys.stdout.flush() image_file = image_path+imagename image_data = tf.gfile.FastGFile(image_file, 'r').read() xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes[i]: [l.append(point) for l, point in zip([xmin, ymin, xmax, ymax], b)] image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(input_height), 'image/width': int64_feature(input_width), 'image/channels': int64_feature(input_depth), 'image/shape': int64_feature([input_height, input_width, input_depth]), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels[i]), 'image/object/bbox/label_text': bytes_feature(labels_text[i]), 'image/object/bbox/difficult': int64_feature(difficult[i]), 'image/object/bbox/truncated': int64_feature(truncated[i]), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data)})) tfrecord_writer.write(example.SerializeToString()) print('\nFinished converting the Caltech dataset!')
def _convert_to_example(image_data, labels, labels_text, bboxes, shape, difficult, truncated, oriented_bbox, ignored, filename): """Build an Example proto for an image example. Args: image_data: string, JPEG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; bboxes: list of bounding boxes; each box is a list of integers; specifying [ymin, xmin, ymax, xmax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. Returns: Example proto """ xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned x1 = [] x2 = [] x3 = [] x4 = [] y1 = [] y2 = [] y3 = [] y4 = [] for orgin in oriented_bbox: assert len(orgin) == 8 [ l.append(point) for l, point in zip([x1, x2, x3, x4, y1, y2, y3, y4], orgin) ] image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/filename': bytes_feature(filename.encode('utf-8')), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/x1': float_feature(x1), 'image/object/bbox/y1': float_feature(y1), 'image/object/bbox/x2': float_feature(x2), 'image/object/bbox/y2': float_feature(y2), 'image/object/bbox/x3': float_feature(x3), 'image/object/bbox/y3': float_feature(y3), 'image/object/bbox/x4': float_feature(x4), 'image/object/bbox/y4': float_feature(y4), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/object/bbox/ignored': int64_feature(ignored), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example