def _convert_to_example(image_data, shape, bboxes, labels, difficult, truncated, preprocessed_box, name): ''' ''' xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': float_feature(labels), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/preprocessed_box': int64_feature(preprocessed_box.tolist()), 'image/encoded': bytes_feature(image_data), 'image/name': bytes_feature(bytes(name, encoding='utf-8')) })) return example
def _convert_to_example(image_data, image_shape, density_map, density_shape): """Build an Example proto for an image example. Args: image_data: image raw data(string) image_shape: shape of image density_map: density map which is generated from annotations density_shape: shape of density map Returns: Example proto """ assert (image_shape[0] == RESIZED_IMAGE_SHAPE[0]) assert (image_shape[1] == RESIZED_IMAGE_SHAPE[1]) assert (density_shape[0] == RESIZED_IMAGE_SHAPE[0] / SHRINK_RATIO) assert (density_shape[1] == RESIZED_IMAGE_SHAPE[1] / SHRINK_RATIO) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(image_shape[0]), 'image/width': int64_feature(image_shape[1]), 'image/channels': int64_feature(CHANNELS), 'image/shape': int64_feature([image_shape[0], image_shape[1], CHANNELS]), 'image/encoded': bytes_feature(image_data), 'image/format': bytes_feature(b'RAW'), 'image/density_map/shape': int64_feature([density_shape[0], density_shape[1]]), 'image/density_map/data': float_feature(density_map.flatten()) })) return example
def _convert_to_example(image_data, labels, labels_text, bboxes, shape, difficult, truncated): xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned注意这里坐标的顺序 [l.append(point) for l, point in zip([xmin, ymin, xmax, ymax], b)] # pylint: enable=expression-not-assigned image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
def _format_data(sess, image_reader, idx, tmp_dir, pathlist_A, pathlist_B): ## Resize and random flip # if np.random.rand()>0.5: # IMG_FLIP = True # else: # IMG_FLIP = False IMG_FLIP = False path_A = _img_resize_flip(pathlist_A[idx], IMG_FLIP, tmp_dir, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT) path_B = _img_resize_flip(pathlist_B[idx], IMG_FLIP, tmp_dir, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT) image_raw_A = tf.gfile.FastGFile(path_A, 'r').read() image_raw_B = tf.gfile.FastGFile(path_B, 'r').read() height, width = image_reader.read_image_dims(sess, image_raw_A) # pdb.set_trace() example = tf.train.Example(features=tf.train.Features(feature={ 'image_name_A': dataset_utils.bytes_feature(pathlist_A[idx].split('/')[-1]), 'image_name_B': dataset_utils.bytes_feature(pathlist_B[idx].split('/')[-1]), 'image_raw_A': dataset_utils.bytes_feature(image_raw_A), 'image_raw_B': dataset_utils.bytes_feature(image_raw_B), 'image_format': dataset_utils.bytes_feature('png'), 'image_height': dataset_utils.int64_feature(height), 'image_width': dataset_utils.int64_feature(width), })) return example
def convert_to_example(imgdata, shape, labels, labels_text, bboxes): '''转换数据''' xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channel': int64_feature(shape[2]), 'image/shape': int64_feature(list(shape)), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(imgdata) })) return example
def process_data1(image, tfrecord_write): image_format = b'PNG' example = tf.train.Example( features = tf.train.Features( feature ={ 'image/encoded' : bytes_feature(image), 'image/format' : bytes_feature(image_format), } )) tfrecord_write.write(example.SerializeToString())
def dict_to_tf_example(encoded_jpg, label, theta): class_label = 0 if label == 'positive' else 1 theta_label = theta example = tf.train.Example(features=tf.train.Features( feature={ 'image/encoded': dataset_utils.bytes_feature(encoded_jpg), 'image/format': dataset_utils.bytes_feature('jpeg'), 'image/class/label': dataset_utils.int64_feature(class_label), 'image/theta/label': dataset_utils.int64_feature(theta_label), })) return example
def create_tf_example(example): # Udacity real data image format from Carla # Files can be downloaded from https://mega.nz/#F!ldJhzRhL!NWASXMs4cWegrYYNbJ7bEg height = 1096 # Image height width = 1368 # Image width filename = example['path'] # Filename of the image. Empty if image is not from file filename = filename.encode() with tf.gfile.GFile(example['path'], 'rb') as fid: encoded_image = fid.read() image_format = 'jpg'.encode() xmins = [] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for box in example['boxes']: #if box['occluded'] is False: #print("adding box") xmins.append(float(box['x_min'] / width)) xmaxs.append(float(box['x_max'] / width)) ymins.append(float(box['y_min'] / height)) ymaxs.append(float(box['y_max'] / height)) classes_text.append(box['label'].encode()) classes.append(int(LABEL_DICT_4[box['label']])) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_utils.int64_feature(height), 'image/width': dataset_utils.int64_feature(width), 'image/filename': dataset_utils.bytes_feature(filename), 'image/source_id': dataset_utils.bytes_feature(filename), 'image/encoded': dataset_utils.bytes_feature(encoded_image), 'image/format': dataset_utils.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_utils.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_utils.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_utils.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_utils.float_list_feature(ymaxs), 'image/object/class/text': dataset_utils.bytes_list_feature(classes_text), 'image/object/class/label': dataset_utils.int64_list_feature(classes), })) return tf_example
def process_data(image, image_mask, tfrecord_write): image_format = b'PNG' image_mask_format = b'PNG' xxx = random.random() example = tf.train.Example( features = tf.train.Features( feature ={ 'image/encoded' : bytes_feature(image), 'image/format' : bytes_feature(image_format), #'mask/encode' :bytes_feature(image_mask), #'mask/format' :bytes_feature(image_mask_format), #'mask/encode' :_EncodedFloatFeature(image_mask), } )) tfrecord_write.write(example.SerializeToString())
def _add_to_tfrecord(data_filename, labels_filename, num_images, tfrecord_writer): """Loads data from the binary MNIST files and writes files to a TFRecord. Args: data_filename: The filename of the MNIST images. labels_filename: The filename of the MNIST labels. num_images: The number of images in the dataset. tfrecord_writer: The TFRecord writer to use for writing. """ images = _extract_images(data_filename, num_images) labels = _extract_labels(labels_filename, num_images) shape = (_IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS) with tf.Graph().as_default(): image = tf.placeholder(dtype=tf.uint8, shape=shape) encoded_png = tf.image.encode_png(image) with tf.Session('') as sess: for j in range(num_images): sys.stdout.write('\r>> Converting image %d/%d' % (j + 1, num_images)) sys.stdout.flush() image_raw = images[j].tostring() feature = { 'label': dataset_utils.int64_feature(int(labels[j])), 'image_raw': dataset_utils.bytes_feature(image_raw) } features = tf.train.Features(feature=feature) example = tf.train.Example(features=features) # png_string = sess.run(encoded_png, feed_dict={image: images[j]}) # example = dataset_utils.image_to_tfexample( # png_string, 'png'.encode(), _IMAGE_SIZE, _IMAGE_SIZE, labels[j]) tfrecord_writer.write(example.SerializeToString())
def dict_to_tf_example(path, size, label, theta): with tf.gfile.GFile(path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') class_label = 1 if label == 'positive' else 0 theta_label = convert_theta(float(theta)) example = tf.train.Example(features=tf.train.Features(feature={ 'image/encoded': dataset_utils.bytes_feature(encoded_jpg), 'image/format': dataset_utils.bytes_feature('jpeg'), 'image/class/label': dataset_utils.int64_feature(class_label), 'image/theta/label': dataset_utils.int64_feature(theta_label), })) return example
def _convert_to_example(image_data, labels, labels_text, bboxes, shape, difficult, truncated): """Build an Example proto for an image example. Args: image_data: string, JPEG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; bboxes: list of bounding boxes; each box is a list of integers; specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. Returns: Example proto """ xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned # [(ymin_0, xmin_0, ymax_0, xmax_0), (ymin_1, xmin_1, ymax_1, xmax_1), ....] # | # [ymin_0, ymin_1, ...], [xmin_0, xmin_1, ...], [ymax_0, ymax_1, ...], [xmax_0, xmax_1, ...] [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
def _convert_to_example(image_data, labels, labels_text, bboxes, shape): """Build an Example proto for an image example. Args: image_data: string, JPEG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; bboxes: list of bounding boxes; each box is a list of integers; specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. Returns: Example proto """ xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 [l.append(point) for l, point in zip([xmin, xmax, ymin, ymax], b)] print('xmin:', xmin) print('xmax:', xmax) print('ymin:', ymin) print('ymax:', ymax) image_format = b'PNG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_list_feature(xmin), 'image/object/bbox/xmax': float_list_feature(xmax), 'image/object/bbox/ymin': float_list_feature(ymin), 'image/object/bbox/ymax': float_list_feature(ymax), 'image/object/class/label': int64_feature(labels), 'image/object/class/text': bytes_feature(labels_text), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
def _add_to_tfrecord(filename, tfrecord_writer, offset=0): """Loads data from the cifar10 pickle files and writes files to a TFRecord. Args: filename: The filename of the cifar10 pickle file. tfrecord_writer: The TFRecord writer to use for writing. offset: An offset into the absolute number of images previously written. Returns: The new offset. """ with tf.gfile.Open(filename, 'r') as f: data = cPickle.load(f) images = data['data'] num_images = images.shape[0] images = images.reshape((num_images, 3, 32, 32)) labels = data['labels'] with tf.Graph().as_default(): image_placeholder = tf.placeholder(dtype=tf.uint8) encoded_image = tf.image.encode_png(image_placeholder) with tf.Session('') as sess: for j in range(num_images): sys.stdout.write( '\r>> Reading file [%s] image %d/%d' % (filename, offset + j + 1, offset + num_images)) sys.stdout.flush() # image = np.squeeze(images[j]).transpose((1, 2, 0)) image = images[j].tostring() label = labels[j] # png_string = sess.run(encoded_image, # feed_dict={image_placeholder: image}) # example = dataset_utils.image_to_tfexample( # png_string, 'png', _IMAGE_SIZE, _IMAGE_SIZE, label) feature = { 'label': dataset_utils.int64_feature(int(label)), 'image': dataset_utils.bytes_feature(image) } features = tf.train.Features(feature=feature) example = tf.train.Example(features=features) tfrecord_writer.write(example.SerializeToString()) return offset + num_images
def dict_to_tf_example(file_name): with open(os.path.join(args.data_path, folders[0], file_name + '.png'), 'rb') as fid: encoded_color = fid.read() with open(os.path.join(args.data_path, folders[1], file_name + '.png'), 'rb') as fid: encoded_depth = fid.read() with open(os.path.join(args.data_path, folders[2], file_name + '.png'), 'rb') as fid: encoded_label_map = fid.read() with open(os.path.join(args.data_path, folders[3], file_name + '.png'), 'rb') as fid: encoded_label_aug_map = fid.read() example = tf.train.Example(features=tf.train.Features( feature={ 'image/color': dataset_utils.bytes_feature(encoded_color), 'image/format': dataset_utils.bytes_feature('png'), 'image/encoded_depth': dataset_utils.bytes_feature(encoded_depth), 'image/label': dataset_utils.bytes_feature(encoded_label_map), 'image/label_aug': dataset_utils.bytes_feature( encoded_label_aug_map), })) return example
def dict_to_tf_example(data, label): with open(data, 'rb') as inf: encoded_data = inf.read() img_label = cv2.imread(label) img_mask = image2label(img_label) encoded_label = img_mask.astype(np.uint8).tobytes() height, width = img_label.shape[0], img_label.shape[1] if height < vgg_16.default_image_size or width < vgg_16.default_image_size: # 保证最后随机裁剪的尺寸 return None fname = data[data.rfind('/') + 1:] print(fname) # Your code here, fill the dict feature_dict = { 'image/height': dataset_utils.int64_feature(height), 'image/width': dataset_utils.int64_feature(width), 'image/filename': dataset_utils.bytes_feature(fname.encode('utf8')), 'image/encoded': dataset_utils.bytes_feature(encoded_data), 'image/label': dataset_utils.bytes_feature(encoded_label), 'image/format': dataset_utils.bytes_feature('jpeg'.encode('utf8')), } example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def _convert_to_example(image_data, shape, bbox, label, imname): nbbox = np.array(bbox) ymin = list(nbbox[:, 0]) xmin = list(nbbox[:, 1]) ymax = list(nbbox[:, 2]) xmax = list(nbbox[:, 3]) print('shape: {}, height:{}, width:{}'.format(shape, shape[0], shape[1])) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(label), 'image/format': bytes_feature('jpeg'), 'image/encoded': bytes_feature(image_data), 'image/name': bytes_feature(imname), })) return example
def data_prep(raw_data, tfrecord_filename, _NUM_SHARDS, dataset_directory_address, split_name='Train'): # raw_data[i][0] image address for ith index # raw_data[i][1] boxes in image with ith index xMax = 0 #----------------------- yMax = 0 #----------------------- xMin = 0 #----------------------- yMin = 0 #----------------------- areaMax = 0 #----------------------- areaMin = 0 #----------------------- maxArea_coords = [0, 0, 0, 0] #----------------------- minArea_coords = [0, 0, 0, 0] #----------------------- resize_dir = 'resized_images_INRIA_%dx%d/' % (TARGET_W, TARGET_H) num__per_shard = math.ceil(len(raw_data) / float(_NUM_SHARDS)) with tf.Graph().as_default(): #image_reader = ImageReader() with tf.Session() as sess: for shard_id in range(_NUM_SHARDS): output_filename = _get_dataset_filename( dataset_directory_address, split_name, shard_id, tfrecord_filename=tfrecord_filename, _NUM_SHARDS=_NUM_SHARDS) with tf.python_io.TFRecordWriter( output_filename) as tfrecord_writer: start_index = shard_id * num__per_shard end_index = min((shard_id + 1) * num__per_shard, len(raw_data)) for i in range(start_index, end_index): image = Image.open(dataset_directory_address + raw_data[i][0]) image_address = os.path.join( resize_dir, raw_data[i][0] [10:]) #dataset_directory_address + raw_data[i][0] sys.stdout.write( '\r>>Converting image %d/%d shard %d' % (i + 1, len(raw_data), shard_id)) sys.stdout.flush() orig_w, orig_h = image.size image = image.convert('L') # 8-bit grayscale image = image.resize( (IMG_W, IMG_H), Image.LANCZOS) # high-quality downsampling filter if not os.path.exists(resize_dir): os.makedirs(resize_dir) image.save( os.path.join(resize_dir, raw_data[i][0][10:])) x_scale = IMG_W / orig_w y_scale = IMG_H / orig_h bboxes_coords = [] #for box in raw_data[i][1]: for i, box in enumerate(raw_data[i][1]): ul_x, ul_y, br_x, br_y = box new_box_coordinates = (ul_x * x_scale, ul_y * y_scale, br_x * x_scale, br_y * y_scale) new_box_coordinates = [ round(x) for x in new_box_coordinates ] bboxes_coords.append(new_box_coordinates) "raw_data[i][1][0][0] = new_box_coordinates[0]" "raw_data[i][1][0][1] = new_box_coordinates[1]" "raw_data[i][1][0][2] = new_box_coordinates[2]" "raw_data[i][1][0][3] = new_box_coordinates[3]" #--------------------- create tfexample ----------- b1_ulx = b1_uly = b1_brx = b1_bry = b2_ulx = b2_uly = b2_brx = b2_bry = b3_ulx = b3_uly = b3_brx = b3_bry = b4_ulx = b4_uly = b4_brx = b4_bry = 0 for i, box in enumerate(bboxes_coords): if i == 0: b1_ulx, b1_uly, b1_brx, b1_bry = box elif i == 1: b2_ulx, b2_uly, b2_brx, b2_bry = box elif i == 2: b3_ulx, b3_uly, b3_brx, b3_bry = box elif i == 3: b4_ulx, b4_uly, b4_brx, b4_bry = box '''# ----------- statistics ------------------ if(box[0]==0 and box[1]==0 and box[2]==0 and box[3]==0): continue xMax = max(xMax, box[2]) yMax = max(yMax, box[3]) if xMin==0: xMin = box[0] yMin = box[1] else: xMin = min(xMin, box[0]) yMin = min(yMin, box[1]) Area = (box[2] - box[0]) * (box[3] - box[1]) if areaMax==0: areaMax = Area maxArea_coords = box elif Area>areaMax: areaMax = Area maxArea_coords=box if areaMin==0: areaMin = Area minArea_coords=box elif Area<areaMin: areaMin = Area minArea_coords=box''' example = tf.train.Example(features=tf.train.Features( feature={ #'imagedata': bytes_feature(image_data.tostring()), 'image_address': bytes_feature(tf.compat.as_bytes( image_address)), 'tag': int64_feature(1), 'box1_x0': int64_feature(b1_ulx), 'box1_y0': int64_feature(b1_uly), 'box1_x1': int64_feature(b1_brx), 'box1_y1': int64_feature(b1_bry), 'box2_x0': int64_feature(b2_ulx), 'box2_y0': int64_feature(b2_uly), 'box2_x1': int64_feature(b2_brx), 'box2_y1': int64_feature(b2_bry), 'box3_x0': int64_feature(b3_ulx), 'box3_y0': int64_feature(b3_uly), 'box3_x1': int64_feature(b3_brx), 'box3_y1': int64_feature(b3_bry), 'box4_x0': int64_feature(b4_ulx), 'box4_y0': int64_feature(b4_uly), 'box4_x1': int64_feature(b4_brx), 'box4_y1': int64_feature(b4_bry), })) '''# Rotating resized image and its boxes #draw = ImageDraw.Draw(image) cx = int(((new_box_coordinates[2]-new_box_coordinates[0])/2)+new_box_coordinates[0]) cy = int(((new_box_coordinates[3]-new_box_coordinates[1])/2)+new_box_coordinates[1]) # x0y1-------x0y3 # | | # | cx,cy | # | | # x2,y1-----x2,y3 x0, y1, x2, y3 = new_box_coordinates[0], new_box_coordinates[1], new_box_coordinates[2], new_box_coordinates[3] angle=45 new_img = rotate(image, angle, reshape=False) pil_img = Image.fromarray(new_img) polygon = gd.Rectangle((x0,y1),(x2,y3)) polygon.rotate(angle*math.pi/180,center=(cx,cy)) p0, p1, p2, p3 = polygon.points[0], polygon.points[1], polygon.points[2], polygon.points[3] # x0y0-------x1y1=p1 # | | # | cx,cy | # | | # x3,y3-----x2,y2=p2 def bound_limitation(x,max_x): if(x<0): x=0 elif(x>max_x): x = max_x return x x0 = bound_limitation(p0[0],target_img_width) x1 = bound_limitation(p1[0],target_img_width) x2 = bound_limitation(p2[0],target_img_width) x3 = bound_limitation(p3[0],target_img_width) y0 = bound_limitation(p0[1],target_img_height) y1 = bound_limitation(p1[1],target_img_height) y2 = bound_limitation(p2[1],target_img_height) y3 = bound_limitation(p3[1],target_img_height) draw = ImageDraw.Draw(pil_img) draw.line(((x0,y0),(x1,y1))) draw.line(((x1,y1),(x2,y2))) draw.line(((x2,y2),(x3,y3))) draw.line(((x3,y3),(x0,y0))) draw.rectangle((cx-3,cy-3,(cx+3,cy+3)), fill='white') pil_img.show()''' "image = np.asarray(image)" "images = np.array([image])" "images = np.expand_dims(images, axis=-1)" ## need extra dimension of size 1 for grayscale # ROTATING IMAGE IN DIFFERENT ANGLES AND ADD TO THE LAST COLUMN "tiled = np.tile(np.expand_dims(images, 4), [len(ROTATIONS)])" #angles=[0] #for transformation_index, angle in enumerate(angles): # tiled[:,:,:,:, transformation_index] = rotate(tiled[:, :, :, :, transformation_index], angle, axes=[1, 2], reshape=False)''' "example = image_to_tfexample(tiled,new_box_coordinates,1)" # class_id = 1 is pedestrian #example = image_to_tfexample(image_address,new_box_coordinates,1) tfrecord_writer.write(example.SerializeToString()) tfrecord_writer.close() sys.stdout.write('\n') sys.stdout.flush()
def _format_data(sess, image_reader, folder_path, pairs, idx, labels, id_map, attr_onehot_mat, attr_w2v25_mat, attr_w2v50_mat, attr_w2v100_mat, attr_w2v150_mat, id_map_attr, all_peaks_dic, subsets_dic, seg_data_dir, FiltOutMissRegion=False, FLIP=False): # Read the filename: img_path_0 = os.path.join(folder_path, pairs[idx][0]) img_path_1 = os.path.join(folder_path, pairs[idx][1]) id_0 = pairs[idx][0][0:4] id_1 = pairs[idx][1][0:4] cam_0 = pairs[idx][0][6] cam_1 = pairs[idx][1][6] image_raw_0 = tf.gfile.FastGFile(img_path_0, 'r').read() image_raw_1 = tf.gfile.FastGFile(img_path_1, 'r').read() height, width = image_reader.read_image_dims(sess, image_raw_0) ########################## Attribute ########################## attrs_0 = [] attrs_1 = [] attrs_w2v25_0 = [] attrs_w2v25_1 = [] attrs_w2v50_0 = [] attrs_w2v50_1 = [] attrs_w2v100_0 = [] attrs_w2v100_1 = [] attrs_w2v150_0 = [] attrs_w2v150_1 = [] idx_0 = id_map_attr[id_0] idx_1 = id_map_attr[id_1] # pdb.set_trace() if attr_onehot_mat is not None: for name in attr_onehot_mat.dtype.names: attrs_0.append(attr_onehot_mat[(name)][0][0][0][idx_0]) attrs_1.append(attr_onehot_mat[(name)][0][0][0][idx_1]) if attr_w2v25_mat is not None: for i in xrange(attr_w2v25_mat[0].shape[0]): attrs_w2v25_0 = attrs_w2v25_0 + attr_w2v25_mat[0][i][idx_0].tolist() attrs_w2v25_1 = attrs_w2v25_1 + attr_w2v25_mat[0][i][idx_1].tolist() if attr_w2v50_mat is not None: for i in xrange(attr_w2v50_mat[0].shape[0]): attrs_w2v50_0 = attrs_w2v50_0 + attr_w2v50_mat[0][i][idx_0].tolist() attrs_w2v50_1 = attrs_w2v50_1 + attr_w2v50_mat[0][i][idx_1].tolist() if attr_w2v100_mat is not None: for i in xrange(attr_w2v100_mat[0].shape[0]): attrs_w2v100_0 = attrs_w2v100_0 + attr_w2v100_mat[0][i][idx_0].tolist() attrs_w2v100_1 = attrs_w2v100_1 + attr_w2v100_mat[0][i][idx_1].tolist() if attr_w2v150_mat is not None: for i in xrange(attr_w2v150_mat[0].shape[0]): attrs_w2v150_0 = attrs_w2v150_0 + attr_w2v150_mat[0][i][idx_0].tolist() attrs_w2v150_1 = attrs_w2v150_1 + attr_w2v150_mat[0][i][idx_1].tolist() ########################## Segment ########################## seg_0 = np.zeros([128,64]) seg_1 = np.zeros([128,64]) if seg_data_dir: path_0 = os.path.join(seg_data_dir, pairs[idx][0]) path_1 = os.path.join(seg_data_dir, pairs[idx][1]) if os.exists(path_0) and os.exists(path_1): seg_0 = scipy.misc.imread(path_0) seg_1 = scipy.misc.imread(path_1) if FLIP: # pdb.set_trace() seg_0 = np.fliplr(seg_0) seg_1 = np.fliplr(seg_1) else: return None ########################## Pose 16x8 & Pose coodinate (for 128x64(Solid) 128x64(Gaussian))########################## ## Pose 16x8 w_unit = width/8 h_unit = height/16 pose_peaks_0 = np.zeros([16,8,18]) pose_peaks_1 = np.zeros([16,8,18]) ## Pose coodinate pose_peaks_0_rcv = np.zeros([18,3]) ## Row, Column, Visibility pose_peaks_1_rcv = np.zeros([18,3]) # pose_subs_0 = [] pose_subs_1 = [] # pdb.set_trace() if (all_peaks_dic is not None) and (pairs[idx][0] in all_peaks_dic) and (pairs[idx][1] in all_peaks_dic): ###### Pose 0 ###### peaks = _get_valid_peaks(all_peaks_dic[pairs[idx][0]], subsets_dic[pairs[idx][0]]) indices_r4_0, values_r4_0, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid') indices_r4_0, shape_0 = _oneDimSparsePose(indices_r4_0, shape) pose_mask_r4_0 = _getPoseMask(peaks, height, width, radius=4, mode='Solid') pose_mask_r7_0 = _getPoseMask(peaks, height, width, radius=7, mode='Solid') for ii in range(len(peaks)): p = peaks[ii] if 0!=len(p): pose_peaks_0[int(p[0][1]/h_unit), int(p[0][0]/w_unit), ii] = 1 pose_peaks_0_rcv[ii][0] = p[0][1] pose_peaks_0_rcv[ii][1] = p[0][0] pose_peaks_0_rcv[ii][2] = 1 ## Generate body region proposals # part_bbox_list_0, visibility_list_0 = get_part_bbox7(peaks, img_path_0, radius=6, idx=idx) part_bbox_list_0, visibility_list_0 = get_part_bbox37(peaks, img_path_0, radius=6) if FiltOutMissRegion and (0 in visibility_list_0): return None ###### Pose 1 ###### peaks = _get_valid_peaks(all_peaks_dic[pairs[idx][1]], subsets_dic[pairs[idx][1]]) indices_r4_1, values_r4_1, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid') indices_r4_1, shape_1 = _oneDimSparsePose(indices_r4_1, shape) pose_mask_r4_1 = _getPoseMask(peaks, height, width, radius=4, mode='Solid') pose_mask_r7_1 = _getPoseMask(peaks, height, width, radius=7, mode='Solid') ## Generate body region proposals # part_bbox_list_1, visibility_list_1 = get_part_bbox7(peaks, img_path_1, radius=7) part_bbox_list_1, visibility_list_1 = get_part_bbox37(peaks, img_path_0, radius=6) if FiltOutMissRegion and (0 in visibility_list_1): return None ###### Visualize ###### # dense = _sparse2dense(indices_r4_0, values_r4_0, shape) # _visualizePose(pose_mask_r4_0, scipy.misc.imread(img_path_0)) # _visualizePose(pose_mask_r7_0, scipy.misc.imread(img_path_0)) # pdb.set_trace() for ii in range(len(peaks)): p = peaks[ii] if 0!=len(p): pose_peaks_1[int(p[0][1]/h_unit), int(p[0][0]/w_unit), ii] = 1 pose_peaks_1_rcv[ii][0] = p[0][1] pose_peaks_1_rcv[ii][1] = p[0][0] pose_peaks_1_rcv[ii][2] = 1 pose_subs_0 = subsets_dic[pairs[idx][0]][0].tolist() pose_subs_1 = subsets_dic[pairs[idx][1]][0].tolist() else: return None example = tf.train.Example(features=tf.train.Features(feature={ 'image_name_0': dataset_utils.bytes_feature(pairs[idx][0]), 'image_name_1': dataset_utils.bytes_feature(pairs[idx][1]), 'image_raw_0': dataset_utils.bytes_feature(image_raw_0), 'image_raw_1': dataset_utils.bytes_feature(image_raw_1), 'label': dataset_utils.int64_feature(labels[idx]), 'id_0': dataset_utils.int64_feature(id_map[id_0]), 'id_1': dataset_utils.int64_feature(id_map[id_1]), 'cam_0': dataset_utils.int64_feature(int(cam_0)), 'cam_1': dataset_utils.int64_feature(int(cam_1)), 'image_format': dataset_utils.bytes_feature('jpg'), 'image_height': dataset_utils.int64_feature(height), 'image_width': dataset_utils.int64_feature(width), 'real_data': dataset_utils.int64_feature(1), 'attrs_0': dataset_utils.int64_feature(attrs_0), 'attrs_1': dataset_utils.int64_feature(attrs_1), 'attrs_w2v25_0': dataset_utils.float_feature(attrs_w2v25_0), 'attrs_w2v25_1': dataset_utils.float_feature(attrs_w2v25_1), 'attrs_w2v50_0': dataset_utils.float_feature(attrs_w2v50_0), 'attrs_w2v50_1': dataset_utils.float_feature(attrs_w2v50_1), 'attrs_w2v100_0': dataset_utils.float_feature(attrs_w2v100_0), 'attrs_w2v100_1': dataset_utils.float_feature(attrs_w2v100_1), 'attrs_w2v150_0': dataset_utils.float_feature(attrs_w2v150_0), 'attrs_w2v150_1': dataset_utils.float_feature(attrs_w2v150_1), 'pose_peaks_0': dataset_utils.float_feature(pose_peaks_0.flatten().tolist()), 'pose_peaks_1': dataset_utils.float_feature(pose_peaks_1.flatten().tolist()), 'pose_peaks_0_rcv': dataset_utils.float_feature(pose_peaks_0_rcv.flatten().tolist()), 'pose_peaks_1_rcv': dataset_utils.float_feature(pose_peaks_1_rcv.flatten().tolist()), 'pose_mask_r4_0': dataset_utils.int64_feature(pose_mask_r4_0.astype(np.int64).flatten().tolist()), 'pose_mask_r4_1': dataset_utils.int64_feature(pose_mask_r4_1.astype(np.int64).flatten().tolist()), 'pose_mask_r6_0': dataset_utils.int64_feature(pose_mask_r7_0.astype(np.int64).flatten().tolist()), 'pose_mask_r6_1': dataset_utils.int64_feature(pose_mask_r7_1.astype(np.int64).flatten().tolist()), 'seg_0': dataset_utils.int64_feature(seg_0.astype(np.int64).flatten().tolist()), 'seg_1': dataset_utils.int64_feature(seg_1.astype(np.int64).flatten().tolist()), 'shape': dataset_utils.int64_feature(shape_0), 'indices_r4_0': dataset_utils.int64_feature(np.array(indices_r4_0).astype(np.int64).flatten().tolist()), 'values_r4_0': dataset_utils.float_feature(np.array(values_r4_0).astype(np.float).flatten().tolist()), 'indices_r4_1': dataset_utils.int64_feature(np.array(indices_r4_1).astype(np.int64).flatten().tolist()), 'values_r4_1': dataset_utils.float_feature(np.array(values_r4_1).astype(np.float).flatten().tolist()), 'pose_subs_0': dataset_utils.float_feature(pose_subs_0), 'pose_subs_1': dataset_utils.float_feature(pose_subs_1), 'part_bbox_0': dataset_utils.int64_feature(np.array(part_bbox_list_0).astype(np.int64).flatten().tolist()), 'part_bbox_1': dataset_utils.int64_feature(np.array(part_bbox_list_1).astype(np.int64).flatten().tolist()), 'part_vis_0': dataset_utils.int64_feature(np.array(visibility_list_0).astype(np.int64).flatten().tolist()), 'part_vis_1': dataset_utils.int64_feature(np.array(visibility_list_1).astype(np.int64).flatten().tolist()), })) return example
def _format_data(sess, image_reader, folder_path, pairs, i, labels, id_map, attr_mat, id_map_attr, all_peaks_dic, subsets_dic, FiltOutMissRegion=False): # Read the filename: img_path_0 = os.path.join(folder_path, pairs[i][0]) img_path_1 = os.path.join(folder_path, pairs[i][1]) id_0 = pairs[i][0].split('_')[0] id_1 = pairs[i][1].split('_')[0] image_raw_0 = tf.gfile.FastGFile(img_path_0, 'r').read() image_raw_1 = tf.gfile.FastGFile(img_path_1, 'r').read() height, width = image_reader.read_image_dims(sess, image_raw_0) attrs_0 = [] attrs_1 = [] if attr_mat is not None: idx_0 = id_map_attr[id_0] idx_1 = id_map_attr[id_1] for name in attr_mat.dtype.names: attrs_0.append(attr_mat[(name)][0][0][0][idx_0]) attrs_1.append(attr_mat[(name)][0][0][0][idx_1]) ########################## Pose 16x8 & Pose coodinate (for 128x64(Solid) 128x64(Gaussian))########################## ## Pose 16x8 w_unit = width / 16 h_unit = height / 16 pose_peaks_0 = np.zeros([16, 16, 18]) pose_peaks_1 = np.zeros([16, 16, 18]) ## Pose coodinate pose_peaks_0_rcv = np.zeros([18, 3]) pose_peaks_1_rcv = np.zeros([18, 3]) # pose_subs_0 = [] pose_subs_1 = [] # pdb.set_trace() if (all_peaks_dic is not None) and (pairs[i][0] in all_peaks_dic) and ( pairs[i][1] in all_peaks_dic): ## Pose 0 # peaks = all_peaks_dic[pairs[i][0]] peaks = _get_valid_peaks(all_peaks_dic[pairs[i][0]], subsets_dic[pairs[i][0]]) # print(peaks) indices_r4_0, values_r4_0, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid') indices_r4_0, shape_0 = _oneDimSparsePose(indices_r4_0, shape) indices_r8_0, values_r8_0, shape = _getSparsePose(peaks, height, width, 18, radius=8, mode='Solid') indices_r8_0, _ = _oneDimSparsePose(indices_r8_0, shape) # pose_dense_r4_0 = _sparse2dense(indices_r4_0, values_r4_0, shape) pose_mask_r4_0 = _getPoseMask(peaks, height, width, radius=4, mode='Solid') pose_mask_r8_0 = _getPoseMask(peaks, height, width, radius=8, mode='Solid') # indices_r6_v4_0, values_r6_v4_0, shape = _getSparsePose(peaks, height, width, 18, radius=6, var=4, mode='Gaussian') for ii in range(len(peaks)): p = peaks[ii] if 0 != len(p): pose_peaks_0[int(p[0][1] / h_unit), int(p[0][0] / w_unit), ii] = 1 pose_peaks_0_rcv[ii][0] = p[0][1] pose_peaks_0_rcv[ii][1] = p[0][0] pose_peaks_0_rcv[ii][2] = 1 ## Generate body region proposals # part_bbox_list_0, visibility_list_0 = get_part_bbox(peaks, img_path_0, i) part_bbox_list_0, visibility_list_0 = get_part_bbox(peaks, img_path_0) if FiltOutMissRegion and (0 in visibility_list_0): return None roi_mask_list_0 = get_roi_mask(part_bbox_list_0, visibility_list_0) roi10_mask_0 = np.transpose(np.squeeze(np.array(roi_mask_list_0)), [1, 2, 0]) ## Pose 1 # peaks = all_peaks_dic[pairs[i][1]] peaks = _get_valid_peaks(all_peaks_dic[pairs[i][1]], subsets_dic[pairs[i][1]]) indices_r4_1, values_r4_1, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid') indices_r4_1, shape_1 = _oneDimSparsePose(indices_r4_1, shape) indices_r8_1, values_r8_1, shape = _getSparsePose(peaks, height, width, 18, radius=8, mode='Solid') indices_r8_1, _ = _oneDimSparsePose(indices_r8_1, shape) # pose_dense_r4_1 = _sparse2dense(indices_r4_1, values_r4_1, shape) pose_mask_r4_1 = _getPoseMask(peaks, height, width, radius=4, mode='Solid') pose_mask_r8_1 = _getPoseMask(peaks, height, width, radius=8, mode='Solid') # indices_r6_v4_1, values_r6_v4_1, shape = _getSparsePose(peaks, height, width, 18, radius=6, var=4, mode='Gaussian') ## Generate body region proposals part_bbox_list_1, visibility_list_1 = get_part_bbox(peaks, img_path_1) if FiltOutMissRegion and (0 in visibility_list_1): return None roi_mask_list_1 = get_roi_mask(part_bbox_list_1, visibility_list_1) roi10_mask_1 = np.transpose(np.squeeze(np.array(roi_mask_list_1)), [1, 2, 0]) ###### Visualize ###### # dense = _sparse2dense(indices_r4, values_r4, shape) # _visualizePose(pose_mask_r4_0, scipy.misc.imread(img_path_0)) # _visualizePose(pose_mask_r4_1, scipy.misc.imread(img_path_1)) # if i in [0,5]: # _visualizePose(roi_mask_list_0[0], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[1], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[2], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[3], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[4], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[5], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[6], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[7], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[8], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[9], scipy.misc.imread(img_path_0)) # pdb.set_trace() for ii in range(len(peaks)): p = peaks[ii] if 0 != len(p): pose_peaks_1[int(p[0][1] / h_unit), int(p[0][0] / w_unit), ii] = 1 pose_peaks_1_rcv[ii][0] = p[0][1] pose_peaks_1_rcv[ii][1] = p[0][0] pose_peaks_1_rcv[ii][2] = 1 pose_subs_0 = subsets_dic[pairs[i][0]][0].tolist() pose_subs_1 = subsets_dic[pairs[i][1]][0].tolist() else: return None example = tf.train.Example(features=tf.train.Features( feature={ 'image_name_0': dataset_utils.bytes_feature(pairs[i][0]), 'image_name_1': dataset_utils.bytes_feature(pairs[i][1]), 'image_raw_0': dataset_utils.bytes_feature(image_raw_0), 'image_raw_1': dataset_utils.bytes_feature(image_raw_1), 'label': dataset_utils.int64_feature(labels[i]), 'id_0': dataset_utils.int64_feature(id_map[id_0]), 'id_1': dataset_utils.int64_feature(id_map[id_1]), 'cam_0': dataset_utils.int64_feature(-1), 'cam_1': dataset_utils.int64_feature(-1), 'image_format': dataset_utils.bytes_feature('jpg'), 'image_height': dataset_utils.int64_feature(height), 'image_width': dataset_utils.int64_feature(width), 'real_data': dataset_utils.int64_feature(1), 'attrs_0': dataset_utils.int64_feature(attrs_0), 'attrs_1': dataset_utils.int64_feature(attrs_1), 'pose_peaks_0': dataset_utils.float_feature(pose_peaks_0.flatten().tolist()), 'pose_peaks_1': dataset_utils.float_feature(pose_peaks_1.flatten().tolist()), 'pose_peaks_0_rcv': dataset_utils.float_feature(pose_peaks_0_rcv.flatten().tolist()), 'pose_peaks_1_rcv': dataset_utils.float_feature(pose_peaks_1_rcv.flatten().tolist()), # 'pose_dense_r4_0': dataset_utils.int64_feature(pose_dense_r4_0.astype(np.int64).flatten().tolist()), # 'pose_dense_r4_1': dataset_utils.int64_feature(pose_dense_r4_1.astype(np.int64).flatten().tolist()), 'pose_mask_r4_0': dataset_utils.int64_feature( pose_mask_r4_0.astype(np.int64).flatten().tolist()), 'pose_mask_r4_1': dataset_utils.int64_feature( pose_mask_r4_1.astype(np.int64).flatten().tolist()), 'pose_mask_r8_0': dataset_utils.int64_feature( pose_mask_r8_0.astype(np.int64).flatten().tolist()), 'pose_mask_r8_1': dataset_utils.int64_feature( pose_mask_r8_1.astype(np.int64).flatten().tolist()), 'shape': dataset_utils.int64_feature(shape_0), # 'indices_r6_v4_0': dataset_utils.int64_feature(np.array(indices_r6_v4_0).astype(np.int64).flatten().tolist()), # 'values_r6_v4_0': dataset_utils.float_feature(np.array(values_r6_v4_0).astype(np.float).flatten().tolist()), # 'indices_r6_v4_1': dataset_utils.int64_feature(np.array(indices_r6_v4_1).astype(np.int64).flatten().tolist()), # 'values_r6_v4_1': dataset_utils.float_feature(np.array(values_r6_v4_1).astype(np.float).flatten().tolist()), 'indices_r4_0': dataset_utils.int64_feature( np.array(indices_r4_0).astype(np.int64).flatten().tolist()), 'values_r4_0': dataset_utils.float_feature( np.array(values_r4_0).astype(np.float).flatten().tolist()), 'indices_r4_1': dataset_utils.int64_feature( np.array(indices_r4_1).astype(np.int64).flatten().tolist()), 'values_r4_1': dataset_utils.float_feature( np.array(values_r4_1).astype(np.float).flatten().tolist()), 'indices_r8_0': dataset_utils.int64_feature( np.array(indices_r8_0).astype(np.int64).flatten().tolist()), 'values_r8_0': dataset_utils.float_feature( np.array(values_r8_0).astype(np.float).flatten().tolist()), 'indices_r8_1': dataset_utils.int64_feature( np.array(indices_r8_1).astype(np.int64).flatten().tolist()), 'values_r8_1': dataset_utils.float_feature( np.array(values_r8_1).astype(np.float).flatten().tolist()), 'pose_subs_0': dataset_utils.float_feature(pose_subs_0), 'pose_subs_1': dataset_utils.float_feature(pose_subs_1), 'part_bbox_0': dataset_utils.int64_feature( np.array(part_bbox_list_0).astype( np.int64).flatten().tolist()), 'part_bbox_1': dataset_utils.int64_feature( np.array(part_bbox_list_1).astype( np.int64).flatten().tolist()), 'part_vis_0': dataset_utils.int64_feature( np.array(visibility_list_0).astype( np.int64).flatten().tolist()), 'part_vis_1': dataset_utils.int64_feature( np.array(visibility_list_1).astype( np.int64).flatten().tolist()), 'roi10_mask_0': dataset_utils.int64_feature( roi10_mask_0.astype(np.int64).flatten().tolist()), 'roi10_mask_1': dataset_utils.int64_feature( roi10_mask_1.astype(np.int64).flatten().tolist()), })) return example
def _create_tf_example(image, annotations, image_dir): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations: dict with objects (a list of image annotations) and a label. {u'objects':[{"area", "bbox" : [x,y,width,height}], u'label'}. Notice that bounding box coordinates in the COCO dataset are given as[x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function also converts to the format that can be used by the Tensorflow Object Detection API (which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. Returns: tf_example: The converted tf.Example Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) key = hashlib.sha256(encoded_jpg).hexdigest() xmin, xmax, ymin, ymax, area = [], [], [], [], [] for obj in annotations['objects']: (x, y, width, height) = tuple(obj['bbox']) xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) area.append(obj['area']) feature_dict = { 'image/height': dataset_utils.int64_feature(image_height), 'image/width': dataset_utils.int64_feature(image_width), 'image/filename': dataset_utils.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_utils.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_utils.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_utils.bytes_feature(encoded_jpg), 'image/format': dataset_utils.bytes_feature('jpeg'.encode('utf8')), 'image/class/label': dataset_utils.int64_feature(annotations['label']), 'image/object/bbox/xmin': dataset_utils.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_utils.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_utils.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_utils.float_list_feature(ymax), 'image/object/area': dataset_utils.float_list_feature(area), } example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def _format_data(sess, image_reader, folder_path, pairs, i, labels, id_map, attr_mat, id_map_attr, all_peaks_dic, subsets_dic, FiltOutMissRegion=False): # Read the filename: img_path_0 = os.path.join(folder_path, pairs[i][0]) img_path_1 = os.path.join(folder_path, pairs[i][1]) id_0 = pairs[i][0].split('_')[0] id_1 = pairs[i][1].split('_')[0] image_raw_0 = tf.gfile.FastGFile(img_path_0, 'r').read() image_raw_1 = tf.gfile.FastGFile(img_path_1, 'r').read() height, width = image_reader.read_image_dims(sess, image_raw_0) attrs_0 = [] attrs_1 = [] if attr_mat is not None: idx_0 = id_map_attr[id_0] idx_1 = id_map_attr[id_1] for name in attr_mat.dtype.names: attrs_0.append(attr_mat[(name)][0][0][0][idx_0]) attrs_1.append(attr_mat[(name)][0][0][0][idx_1]) ########################## Pose 16x8 & Pose coodinate (for 128x64(Solid) 128x64(Gaussian))########################## ## Pose 16x8 w_unit = width/16 h_unit = height/16 pose_peaks_0 = np.zeros([16,16,18]) pose_peaks_1 = np.zeros([16,16,18]) ## Pose coodinate pose_peaks_0_rcv = np.zeros([18,3]) pose_peaks_1_rcv = np.zeros([18,3]) # pose_subs_0 = [] pose_subs_1 = [] # pdb.set_trace() if (all_peaks_dic is not None) and (pairs[i][0] in all_peaks_dic) and (pairs[i][1] in all_peaks_dic): ## Pose 0 # peaks = all_peaks_dic[pairs[i][0]] peaks = _get_valid_peaks(all_peaks_dic[pairs[i][0]], subsets_dic[pairs[i][0]]) # print(peaks) indices_r4_0, values_r4_0, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid') indices_r4_0, shape_0 = _oneDimSparsePose(indices_r4_0, shape) indices_r8_0, values_r8_0, shape = _getSparsePose(peaks, height, width, 18, radius=8, mode='Solid') indices_r8_0, _ = _oneDimSparsePose(indices_r8_0, shape) # pose_dense_r4_0 = _sparse2dense(indices_r4_0, values_r4_0, shape) pose_mask_r4_0 = _getPoseMask(peaks, height, width, radius=4, mode='Solid') pose_mask_r8_0 = _getPoseMask(peaks, height, width, radius=8, mode='Solid') # indices_r6_v4_0, values_r6_v4_0, shape = _getSparsePose(peaks, height, width, 18, radius=6, var=4, mode='Gaussian') for ii in range(len(peaks)): p = peaks[ii] if 0!=len(p): pose_peaks_0[int(p[0][1]/h_unit), int(p[0][0]/w_unit), ii] = 1 pose_peaks_0_rcv[ii][0] = p[0][1] pose_peaks_0_rcv[ii][1] = p[0][0] pose_peaks_0_rcv[ii][2] = 1 ## Generate body region proposals # part_bbox_list_0, visibility_list_0 = get_part_bbox(peaks, img_path_0, i) part_bbox_list_0, visibility_list_0 = get_part_bbox(peaks, img_path_0) if FiltOutMissRegion and (0 in visibility_list_0): return None roi_mask_list_0 = get_roi_mask(part_bbox_list_0, visibility_list_0) roi10_mask_0 = np.transpose(np.squeeze(np.array(roi_mask_list_0)),[1,2,0]) ## Pose 1 # peaks = all_peaks_dic[pairs[i][1]] peaks = _get_valid_peaks(all_peaks_dic[pairs[i][1]], subsets_dic[pairs[i][1]]) indices_r4_1, values_r4_1, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid') indices_r4_1, shape_1 = _oneDimSparsePose(indices_r4_1, shape) indices_r8_1, values_r8_1, shape = _getSparsePose(peaks, height, width, 18, radius=8, mode='Solid') indices_r8_1, _ = _oneDimSparsePose(indices_r8_1, shape) # pose_dense_r4_1 = _sparse2dense(indices_r4_1, values_r4_1, shape) pose_mask_r4_1 = _getPoseMask(peaks, height, width, radius=4, mode='Solid') pose_mask_r8_1 = _getPoseMask(peaks, height, width, radius=8, mode='Solid') # indices_r6_v4_1, values_r6_v4_1, shape = _getSparsePose(peaks, height, width, 18, radius=6, var=4, mode='Gaussian') ## Generate body region proposals part_bbox_list_1, visibility_list_1 = get_part_bbox(peaks, img_path_1) if FiltOutMissRegion and (0 in visibility_list_1): return None roi_mask_list_1 = get_roi_mask(part_bbox_list_1, visibility_list_1) roi10_mask_1 = np.transpose(np.squeeze(np.array(roi_mask_list_1)),[1,2,0]) ###### Visualize ###### # dense = _sparse2dense(indices_r4, values_r4, shape) # _visualizePose(pose_mask_r4_0, scipy.misc.imread(img_path_0)) # _visualizePose(pose_mask_r4_1, scipy.misc.imread(img_path_1)) # if i in [0,5]: # _visualizePose(roi_mask_list_0[0], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[1], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[2], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[3], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[4], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[5], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[6], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[7], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[8], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[9], scipy.misc.imread(img_path_0)) # pdb.set_trace() for ii in range(len(peaks)): p = peaks[ii] if 0!=len(p): pose_peaks_1[int(p[0][1]/h_unit), int(p[0][0]/w_unit), ii] = 1 pose_peaks_1_rcv[ii][0] = p[0][1] pose_peaks_1_rcv[ii][1] = p[0][0] pose_peaks_1_rcv[ii][2] = 1 pose_subs_0 = subsets_dic[pairs[i][0]][0].tolist() pose_subs_1 = subsets_dic[pairs[i][1]][0].tolist() else: return None example = tf.train.Example(features=tf.train.Features(feature={ 'image_name_0': dataset_utils.bytes_feature(pairs[i][0]), 'image_name_1': dataset_utils.bytes_feature(pairs[i][1]), 'image_raw_0': dataset_utils.bytes_feature(image_raw_0), 'image_raw_1': dataset_utils.bytes_feature(image_raw_1), 'label': dataset_utils.int64_feature(labels[i]), 'id_0': dataset_utils.int64_feature(id_map[id_0]), 'id_1': dataset_utils.int64_feature(id_map[id_1]), 'cam_0': dataset_utils.int64_feature(-1), 'cam_1': dataset_utils.int64_feature(-1), 'image_format': dataset_utils.bytes_feature('jpg'), 'image_height': dataset_utils.int64_feature(height), 'image_width': dataset_utils.int64_feature(width), 'real_data': dataset_utils.int64_feature(1), 'attrs_0': dataset_utils.int64_feature(attrs_0), 'attrs_1': dataset_utils.int64_feature(attrs_1), 'pose_peaks_0': dataset_utils.float_feature(pose_peaks_0.flatten().tolist()), 'pose_peaks_1': dataset_utils.float_feature(pose_peaks_1.flatten().tolist()), 'pose_peaks_0_rcv': dataset_utils.float_feature(pose_peaks_0_rcv.flatten().tolist()), 'pose_peaks_1_rcv': dataset_utils.float_feature(pose_peaks_1_rcv.flatten().tolist()), # 'pose_dense_r4_0': dataset_utils.int64_feature(pose_dense_r4_0.astype(np.int64).flatten().tolist()), # 'pose_dense_r4_1': dataset_utils.int64_feature(pose_dense_r4_1.astype(np.int64).flatten().tolist()), 'pose_mask_r4_0': dataset_utils.int64_feature(pose_mask_r4_0.astype(np.int64).flatten().tolist()), 'pose_mask_r4_1': dataset_utils.int64_feature(pose_mask_r4_1.astype(np.int64).flatten().tolist()), 'pose_mask_r8_0': dataset_utils.int64_feature(pose_mask_r8_0.astype(np.int64).flatten().tolist()), 'pose_mask_r8_1': dataset_utils.int64_feature(pose_mask_r8_1.astype(np.int64).flatten().tolist()), 'shape': dataset_utils.int64_feature(shape_0), # 'indices_r6_v4_0': dataset_utils.int64_feature(np.array(indices_r6_v4_0).astype(np.int64).flatten().tolist()), # 'values_r6_v4_0': dataset_utils.float_feature(np.array(values_r6_v4_0).astype(np.float).flatten().tolist()), # 'indices_r6_v4_1': dataset_utils.int64_feature(np.array(indices_r6_v4_1).astype(np.int64).flatten().tolist()), # 'values_r6_v4_1': dataset_utils.float_feature(np.array(values_r6_v4_1).astype(np.float).flatten().tolist()), 'indices_r4_0': dataset_utils.int64_feature(np.array(indices_r4_0).astype(np.int64).flatten().tolist()), 'values_r4_0': dataset_utils.float_feature(np.array(values_r4_0).astype(np.float).flatten().tolist()), 'indices_r4_1': dataset_utils.int64_feature(np.array(indices_r4_1).astype(np.int64).flatten().tolist()), 'values_r4_1': dataset_utils.float_feature(np.array(values_r4_1).astype(np.float).flatten().tolist()), 'indices_r8_0': dataset_utils.int64_feature(np.array(indices_r8_0).astype(np.int64).flatten().tolist()), 'values_r8_0': dataset_utils.float_feature(np.array(values_r8_0).astype(np.float).flatten().tolist()), 'indices_r8_1': dataset_utils.int64_feature(np.array(indices_r8_1).astype(np.int64).flatten().tolist()), 'values_r8_1': dataset_utils.float_feature(np.array(values_r8_1).astype(np.float).flatten().tolist()), 'pose_subs_0': dataset_utils.float_feature(pose_subs_0), 'pose_subs_1': dataset_utils.float_feature(pose_subs_1), 'part_bbox_0': dataset_utils.int64_feature(np.array(part_bbox_list_0).astype(np.int64).flatten().tolist()), 'part_bbox_1': dataset_utils.int64_feature(np.array(part_bbox_list_1).astype(np.int64).flatten().tolist()), 'part_vis_0': dataset_utils.int64_feature(np.array(visibility_list_0).astype(np.int64).flatten().tolist()), 'part_vis_1': dataset_utils.int64_feature(np.array(visibility_list_1).astype(np.int64).flatten().tolist()), 'roi10_mask_0': dataset_utils.int64_feature(roi10_mask_0.astype(np.int64).flatten().tolist()), 'roi10_mask_1': dataset_utils.int64_feature(roi10_mask_1.astype(np.int64).flatten().tolist()), })) return example
def _format_data(sess, image_reader, idx, tmp_dir, pathlist_A, pathlist_B, pathlist_A_seg, pathlist_B_seg, pathlist_A_seg_class, pathlist_B_seg_class, B_seg_valid_list): ## Resize and random flip # if np.random.rand()>0.5: # IMG_FLIP = True # else: # IMG_FLIP = False IMG_FLIP = False path_A = _img_resize_flip(pathlist_A[idx], IMG_FLIP, tmp_dir, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT) path_B = _img_resize_flip(pathlist_B[idx], IMG_FLIP, tmp_dir, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT) path_A_seg = _img_resize_flip(pathlist_A_seg[idx], IMG_FLIP, tmp_dir, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT) path_B_seg = _img_resize_flip(pathlist_B_seg[idx], IMG_FLIP, tmp_dir, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT) # pdb.set_trace() path_A_seg_class = _img_resize_flip(pathlist_A_seg_class[idx], IMG_FLIP, tmp_dir, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT) path_B_seg_class = _img_resize_flip(pathlist_B_seg_class[idx], IMG_FLIP, tmp_dir, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT) # nd_A_seg_class = _img_fliplr_oneHot_zoom(pathlist_A_seg_class[idx], IMG_FLIP, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT) # nd_B_seg_class = _img_fliplr_oneHot_zoom(pathlist_B_seg_class[idx], IMG_FLIP, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT) image_raw_A = tf.gfile.FastGFile(path_A, 'r').read() image_raw_B = tf.gfile.FastGFile(path_B, 'r').read() image_raw_A_seg = tf.gfile.FastGFile(path_A_seg, 'r').read() image_raw_B_seg = tf.gfile.FastGFile(path_B_seg, 'r').read() image_raw_A_seg_class = tf.gfile.FastGFile(path_A_seg_class, 'r').read() image_raw_B_seg_class = tf.gfile.FastGFile(path_B_seg_class, 'r').read() height, width = image_reader.read_image_dims(sess, image_raw_A) # pdb.set_trace() example = tf.train.Example(features=tf.train.Features( feature={ 'image_name_A': dataset_utils.bytes_feature(pathlist_A[idx].split('/')[-1]), 'image_name_B': dataset_utils.bytes_feature(pathlist_B[idx].split('/')[-1]), 'image_raw_A': dataset_utils.bytes_feature(image_raw_A), 'image_raw_B': dataset_utils.bytes_feature(image_raw_B), 'image_raw_A_seg': dataset_utils.bytes_feature(image_raw_A_seg), 'image_raw_B_seg': dataset_utils.bytes_feature(image_raw_B_seg), 'image_raw_A_seg_class': dataset_utils.bytes_feature(image_raw_A_seg_class), 'image_raw_B_seg_class': dataset_utils.bytes_feature(image_raw_B_seg_class), # 'image_raw_A_seg_class': dataset_utils.int64_feature(nd_A_seg_class.reshape(-1).tolist()), # 'image_raw_B_seg_class': dataset_utils.int64_feature(nd_B_seg_class.reshape(-1).tolist()), 'image_format': dataset_utils.bytes_feature('png'), 'image_height': dataset_utils.int64_feature(height), 'image_width': dataset_utils.int64_feature(width), 'A_seg_valid': dataset_utils.int64_feature(1), 'B_seg_valid': dataset_utils.int64_feature(B_seg_valid_list[idx]), })) return example
def do_data_prep_with_tfrecord(raw_data, tfrecord_filename, _NUM_SHARDS, dataset_directory_address, num_train_data, num_valid_data, split_name='train'): num__per_shard = math.ceil(len(raw_data) / float(_NUM_SHARDS)) with tf.Graph().as_default(): with tf.Session() as sess: end_index = 0 shard_id = -1 index = 0 Vindex = 0 # this variable is used to compute number of data that are written in validation tfrecord # Change of the following loops in order to adapt it for repetition of train data and extract validation data # Both validation and train data gathering can be done in one run # repeat data for two time to ve # Change of the following loops in order to adapt it for repetition of train data and extract validation data # Both validation and train data gathering can be done in one run # repeat data for two time to verify in training for image_file in data_raw.keys(): if (shard_id < 0 or index >= end_index + 1) and index <= num_train_data: shard_id += 1 end_index = min((shard_id + 1) * num__per_shard, len(raw_data)) output_filename = _get_dataset_filename( dataset_directory_address, split_name, shard_id, tfrecord_filename=tfrecord_filename, _NUM_SHARDS=_NUM_SHARDS) tfrecord_writer_train = tf.python_io.TFRecordWriter( output_filename) y_true_conf, y_true_loc, match_counter = find_gt_boxes( data_raw, image_file) print('size of y_true_conf: %d' % (len(y_true_conf))) print('size of y_true_loc: %d' % (len(y_true_loc))) if (match_counter > 0): image = Image.open( 'Caltech pedestrian dataset/Caltech pedestrian dataset/data_train/images_640x480/' + image_file[27:]) image = image.convert('L') image.save('dataset/test/' + image_file[27:]) index += 1 if split_name == 'train': index_angle = int(np.random.uniform(0, 5)) example = tf.train.Example( features=tf.train.Features( feature={ 'image_address': bytes_feature( tf.compat.as_bytes(image_file)), 'y_true_conf': tf.train.Feature( float_list=tf.train.FloatList( value=y_true_conf.flatten())), 'y_true_loc': tf.train.Feature( float_list=tf.train.FloatList( value=y_true_loc.flatten())), 'index_angle': tf.train.Feature( int64_list=tf.train.Int64List( value=[index_angle])) })) tfrecord_writer_train.write( example.SerializeToString()) sys.stdout.write('\r>> index: %d, number of matchs: %d' % (index, match_counter)) sys.stdout.flush() else: # dataset_directory_address + raw_data[i][0] '''bboxes_coords = [] for i, box in enumerate(raw_data[i][1]): bboxes_coords.append(box)''' if (index <= num_train_data): y_true_conf, y_true_loc, match_counter = find_gt_boxes( data_raw, image_file) if (match_counter > 0): image = Image.open( 'Caltech pedestrian dataset/Caltech pedestrian dataset/data_train/images_640x480/' + image_file[27:]) image = image.convert('L') image.save('dataset/test/' + image_file[27:]) index += 1 if split_name == 'train': index_angle = int(np.random.uniform(0, 5)) example = tf.train.Example( features=tf.train.Features( feature={ 'image_address': bytes_feature( tf.compat.as_bytes( image_file)), 'y_true_conf': tf.train.Feature( float_list=tf.train.FloatList( value=y_true_conf.flatten( ))), 'y_true_loc': tf.train.Feature( float_list=tf.train.FloatList( value=y_true_loc.flatten( ))), 'index_angle': tf.train.Feature( int64_list=tf.train.Int64List( value=[index_angle])) })) tfrecord_writer_train.write( example.SerializeToString()) sys.stdout.write( '\r>> index: %d, number of matchs: %d' % (index, match_counter)) sys.stdout.flush() else: if (Vindex == 0): tfrecord_writer_train.close() print('\nStart to prepare validation data\n') split_name = 'validation' output_filename = _get_dataset_filename( dataset_directory_address, split_name, 0, tfrecord_filename=tfrecord_filename, _NUM_SHARDS=_NUM_SHARDS) tfrecord_writer_validation = tf.python_io.TFRecordWriter( output_filename) y_true_conf, y_true_loc, match_counter = find_gt_boxes( data_raw, image_file) if (match_counter > 0): image = Image.open( 'Caltech pedestrian dataset/Caltech pedestrian dataset/data_train/images_640x480/' + image_file[27:]) image = image.convert('L') image.save('dataset/test/' + image_file[27:]) index += 1 Vindex += 1 if split_name == 'validation': index_angle = int(np.random.uniform(0, 5)) example = tf.train.Example( features=tf.train.Features( feature={ 'image_address': bytes_feature( tf.compat.as_bytes( image_file)), 'y_true_conf': tf.train.Feature( float_list=tf.train.FloatList( value=y_true_conf.flatten( ))), 'y_true_loc': tf.train.Feature( float_list=tf.train.FloatList( value=y_true_loc.flatten( ))), 'index_angle': tf.train.Feature( int64_list=tf.train.Int64List( value=[index_angle])) })) tfrecord_writer_validation.write( example.SerializeToString()) if (Vindex > num_valid_data): break sys.stdout.write( '\r>> index: %d, number of matchs: %d' % (index, match_counter)) sys.stdout.flush() if (Vindex == 0): tfrecord_writer_train.close() if (Vindex > 0): tfrecord_writer_validation.close() print('number of validation data: %d' % (index - Vindex))
def _convert_to_example(image_data, labels, labels_text, bboxes, shape, difficult, truncated, oriented_bbox, ignored, filename): """ Build an Example proto for an image example. :param image_data: string, JPEG encoding of RGB image; :param labels: list of integers, identifier for the ground truth; :param labels_text: list of strings, human-readable labels; :param bboxes: list of bounding boxes; each box is a list of integers; specifying [ymin, xmin, ymax, xmax]. All boxes are assumed to belong to the same label as the image label. :param shape: 3 integers, image shapes in pixels. :param difficult: indicate whether the it is a text or not :param truncated: :param oriented_bbox: bounding box coordinate :param ignored: :param filename: image file name :return: """ xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned x1 = [] x2 = [] x3 = [] x4 = [] y1 = [] y2 = [] y3 = [] y4 = [] for orgin in oriented_bbox: assert len(orgin) == 8 [ l.append(point) for l, point in zip([x1, x2, x3, x4, y1, y2, y3, y4], orgin) ] image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/filename': bytes_feature(filename.encode('utf-8')), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/x1': float_feature(x1), 'image/object/bbox/y1': float_feature(y1), 'image/object/bbox/x2': float_feature(x2), 'image/object/bbox/y2': float_feature(y2), 'image/object/bbox/x3': float_feature(x3), 'image/object/bbox/y3': float_feature(y3), 'image/object/bbox/x4': float_feature(x4), 'image/object/bbox/y4': float_feature(y4), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/object/bbox/ignored': int64_feature(ignored), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
def image_to_tfrecord(video_folder, annotation_file, tfrecord_filename): """Convert Image to TFRecords. crop_folder_name = crop_names.txt crops_to_label = name : label Other than image name, label_text, tfrecord constains additional information include image, image shape, label_id. image and image shape are computed by reading image from disk, label_id is obtai pos_label=ned by a map "label_to_id" from label_text to label_id. """ # read the label file #filename_to_labels_dict = read_label_file(crops_to_label) video_image_list = os.listdir(video_folder) video_label_list = os.listdir(annotation_file) # define image decoding graph inputs = tf.placeholder(dtype=tf.string) decoded_jpg = tf.image.decode_jpeg(inputs) # open tfRecord reader with tf.python_io.TFRecordWriter(tfrecord_filename) as tfrecord_writer: # open a session for image decoding with tf.Session() as sess: for label_file in video_label_list: if label_file.startswith("._"): continue print("VIDEO--------------------------- ", label_file) frame = "" label_path = annotation_file + "/" + label_file video_name = label_file.strip("\n").replace(".txt", "") with open(label_path, "r") as ff: annotations = ff.readlines() for line in annotations: line = line.strip() row = line.split(" ") if frame == row[0]: print("Appending") if row[2] == "Car" or row[2] == "Van" or row[ 2] == "Truck": label_class.append(int(1)) elif row[2] == "Pedestrian" or row[ 2] == "Person_sitting" or row[ 2] == "Cyclist": label_class.append(int(2)) else: label_class.append(int(0)) xmin.append(float(row[6])) ymin.append(float(row[7])) xmax.append(float(row[8])) ymax.append(float(row[9])) else: if frame != "" and os.path.exists(image): print('converting %s' % frame_name) # read image image_data = tf.gfile.FastGFile(image, 'rb').read() # decode image if frame_name.endswith(('png', 'PNG')): image_data_decoded = sess.run( decoded_jpg, feed_dict={inputs: image_data}) image_format = b'PNG' else: raise ValueError( "image%s is not supported" % frame_name) shape = list(image_data_decoded.shape) # create tf example example = tf.train.Example( features=tf.train.Features( feature={ 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data), 'image/filename': bytes_feature( bytes(frame_name, 'utf-8')), #'image/key/sha256' : bytes_feature(shape), 'image/source_id': bytes_feature( bytes(source_id, 'utf-8')), 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'bbox/xmin': float_feature(xmin), 'bbox/xmax': float_feature(xmax), 'bbox/ymin': float_feature(ymin), 'bbox/ymax': float_feature(ymax), 'bbox/label/index': int64_feature(label_class) })) # write example tfrecord_writer.write( example.SerializeToString()) print("FRAME FOUND") xmin = [] ymin = [] xmax = [] ymax = [] label_class = [] frame = row[0] source_id = row[0] frame_name = row[0].zfill(6) + ".png" image = video_folder + "/" + video_name + "/" + frame_name if os.path.exists(image): if row[2] == "Car" or row[ 2] == "Van" or row[2] == "Truck": label_class.append(int(1)) elif row[2] == "Pedestrian" or row[ 2] == "Person_sitting" or row[ 2] == "Cyclist": label_class.append(int(2)) else: label_class.append(int(0)) xmin.append(float(row[6])) ymin.append(float(row[7])) xmax.append(float(row[8])) ymax.append(float(row[9])) else: print("START FRAME") xmin = [] ymin = [] xmax = [] ymax = [] label_class = [] frame = row[0] source_id = row[0] frame_name = row[0].zfill(6) + ".png" image = video_folder + "/" + video_name + "/" + frame_name if os.path.exists(image): if row[2] == "Car" or row[ 2] == "Van" or row[2] == "Truck": label_class.append(int(1)) elif row[2] == "Pedestrian" or row[ 2] == "Person_sitting" or row[ 2] == "Cyclist": label_class.append(int(2)) else: label_class.append(int(0)) xmin.append(float(row[6])) ymin.append(float(row[7])) xmax.append(float(row[8])) ymax.append(float(row[9]))