def to_tfexample(raw_data, mfcc_data, class_id): return tf.train.Example(features=tf.train.Features( feature={ 'audio/mfcc': dataset_utils.float_feature(mfcc_data), 'audio/wav/data': dataset_utils.float_feature(raw_data), 'audio/wav/length': dataset_utils.int64_feature(len(raw_data)), 'audio/label': dataset_utils.int64_feature(class_id) }))
def _process_image(directory, split, name): # Read the image file. filename = os.path.join(directory, 'image_2', name + '.png') image_data = tf.gfile.FastGFile(filename, 'r').read() # Get shape img = cv2.imread(filename) shape = np.shape(img) label_list = [] type_list = [] bbox_x1_list = [] bbox_y1_list = [] bbox_x2_list = [] bbox_y2_list = [] # If 'test' split, skip annotations if re.findall(r'train', split): # Read the txt annotation file. filename = os.path.join(directory, 'label_2', name + '.txt') with open(filename) as anno_file: objects = anno_file.readlines() for object in objects: obj_anno = object.split(' ') type_txt = obj_anno[0].encode('ascii') if type_txt in CLASSES: label_list.append(CLASSES[type_txt]) type_list.append(type_txt) # Bounding Box bbox_x1 = float(obj_anno[4]) bbox_y1 = float(obj_anno[5]) bbox_x2 = float(obj_anno[6]) bbox_y2 = float(obj_anno[7]) bbox_x1_list.append(bbox_x1) bbox_y1_list.append(bbox_y1) bbox_x2_list.append(bbox_x2) bbox_y2_list.append(bbox_y2) image_format = b'PNG' example = tf.train.Example(features=tf.train.Features(feature={ 'image/encoded': bytes_feature(image_data), 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(bbox_x1_list), 'image/object/bbox/xmax': float_feature(bbox_x2_list), 'image/object/bbox/ymin': float_feature(bbox_y1_list), 'image/object/bbox/ymax': float_feature(bbox_y2_list), 'image/object/bbox/label': int64_feature(label_list), 'image/object/bbox/label_text': bytes_feature(type_list), })) return example
def to_tfexample(mfcc_data, video_data, class_id): return tf.train.Example(features=tf.train.Features( feature={ 'audio/mfcc': dataset_utils.float_feature(mfcc_data), 'video/data': dataset_utils.float_feature(video_data), 'label': dataset_utils.int64_feature(class_id) }))
def to_tfexample(color_data, depth_data, color_format, depth_format, class_id): return tf.train.Example(features=tf.train.Features(feature={ 'image/color/encoded': dataset_utils.bytes_feature(color_data), 'image/color/format': dataset_utils.bytes_feature(color_format), 'image/depth/encoded': dataset_utils.bytes_feature(depth_data), 'image/depth/format': dataset_utils.bytes_feature(depth_format), 'image/class/label': dataset_utils.int64_feature(class_id), }))
def to_tfexample(color_video, depth_video, class_id): return tf.train.Example(features=tf.train.Features( feature={ 'video/color/data': dataset_utils.float_feature(list(color_video)), 'video/color/shape': dataset_utils.float_feature( color_video.shape), 'video/depth/data': dataset_utils.float_feature(list(depth_video)), 'video/depth/shape': dataset_utils.float_feature( depth_video.shape), 'video/label': dataset_utils.int64_feature(class_id), }))
def convert_to_tfrecord(batch, output_file): images, labels = batch print('Generating %s' % output_file) images = images.astype(np.uint8) labels = labels.astype(np.int64) with tf.python_io.TFRecordWriter(output_file) as record_writer: for i in range(len(images)): example = tf.train.Example(features=tf.train.Features( feature={ 'image': bytes_feature(images[i].tobytes()), 'label': int64_feature(labels[i]) })) record_writer.write(example.SerializeToString())
def _convert_to_example(image_data, labels, labels_text, bboxes, shape, difficult, truncated): """Build an Example proto for an image example. Args: image_data: string, JPEG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; bboxes: list of bounding boxes; each box is a list of integers; specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. Returns: Example proto """ xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data)})) return example