def _convert_to_example(image_data, shape, bboxes, labels, difficult, truncated, preprocessed_box, name): ''' ''' xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': float_feature(labels), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/preprocessed_box': int64_feature(preprocessed_box.tolist()), 'image/encoded': bytes_feature(image_data), 'image/name': bytes_feature(bytes(name, encoding='utf-8')) })) return example
def convert_to_example(imgdata, shape, labels, labels_text, bboxes): '''转换数据''' xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channel': int64_feature(shape[2]), 'image/shape': int64_feature(list(shape)), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(imgdata) })) return example
def _convert_to_example(image_data, labels, labels_text, bboxes, shape, difficult, truncated): xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned注意这里坐标的顺序 [l.append(point) for l, point in zip([xmin, ymin, xmax, ymax], b)] # pylint: enable=expression-not-assigned image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
def _convert_to_example(image_data, image_shape, density_map, density_shape): """Build an Example proto for an image example. Args: image_data: image raw data(string) image_shape: shape of image density_map: density map which is generated from annotations density_shape: shape of density map Returns: Example proto """ assert (image_shape[0] == RESIZED_IMAGE_SHAPE[0]) assert (image_shape[1] == RESIZED_IMAGE_SHAPE[1]) assert (density_shape[0] == RESIZED_IMAGE_SHAPE[0] / SHRINK_RATIO) assert (density_shape[1] == RESIZED_IMAGE_SHAPE[1] / SHRINK_RATIO) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(image_shape[0]), 'image/width': int64_feature(image_shape[1]), 'image/channels': int64_feature(CHANNELS), 'image/shape': int64_feature([image_shape[0], image_shape[1], CHANNELS]), 'image/encoded': bytes_feature(image_data), 'image/format': bytes_feature(b'RAW'), 'image/density_map/shape': int64_feature([density_shape[0], density_shape[1]]), 'image/density_map/data': float_feature(density_map.flatten()) })) return example
def _convert_to_example(image_data, labels, labels_text, bboxes, shape, difficult, truncated): """Build an Example proto for an image example. Args: image_data: string, JPEG encoding of RGB image; labels: list of integers, identifier for the ground truth; labels_text: list of strings, human-readable labels; bboxes: list of bounding boxes; each box is a list of integers; specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to the same label as the image label. shape: 3 integers, image shapes in pixels. Returns: Example proto """ xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned # [(ymin_0, xmin_0, ymax_0, xmax_0), (ymin_1, xmin_1, ymax_1, xmax_1), ....] # | # [ymin_0, ymin_1, ...], [xmin_0, xmin_1, ...], [ymax_0, ymax_1, ...], [xmax_0, xmax_1, ...] [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
def _convert_to_example(image_data, shape, bbox, label, imname): nbbox = np.array(bbox) ymin = list(nbbox[:, 0]) xmin = list(nbbox[:, 1]) ymax = list(nbbox[:, 2]) xmax = list(nbbox[:, 3]) print('shape: {}, height:{}, width:{}'.format(shape, shape[0], shape[1])) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/label': int64_feature(label), 'image/format': bytes_feature('jpeg'), 'image/encoded': bytes_feature(image_data), 'image/name': bytes_feature(imname), })) return example
def _convert_to_example(image_data, labels, labels_text, bboxes, shape, difficult, truncated, oriented_bbox, ignored, filename): """ Build an Example proto for an image example. :param image_data: string, JPEG encoding of RGB image; :param labels: list of integers, identifier for the ground truth; :param labels_text: list of strings, human-readable labels; :param bboxes: list of bounding boxes; each box is a list of integers; specifying [ymin, xmin, ymax, xmax]. All boxes are assumed to belong to the same label as the image label. :param shape: 3 integers, image shapes in pixels. :param difficult: indicate whether the it is a text or not :param truncated: :param oriented_bbox: bounding box coordinate :param ignored: :param filename: image file name :return: """ xmin = [] ymin = [] xmax = [] ymax = [] for b in bboxes: assert len(b) == 4 # pylint: disable=expression-not-assigned [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)] # pylint: enable=expression-not-assigned x1 = [] x2 = [] x3 = [] x4 = [] y1 = [] y2 = [] y3 = [] y4 = [] for orgin in oriented_bbox: assert len(orgin) == 8 [ l.append(point) for l, point in zip([x1, x2, x3, x4, y1, y2, y3, y4], orgin) ] image_format = b'JPEG' example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'image/channels': int64_feature(shape[2]), 'image/shape': int64_feature(shape), 'image/filename': bytes_feature(filename.encode('utf-8')), 'image/object/bbox/xmin': float_feature(xmin), 'image/object/bbox/xmax': float_feature(xmax), 'image/object/bbox/ymin': float_feature(ymin), 'image/object/bbox/ymax': float_feature(ymax), 'image/object/bbox/x1': float_feature(x1), 'image/object/bbox/y1': float_feature(y1), 'image/object/bbox/x2': float_feature(x2), 'image/object/bbox/y2': float_feature(y2), 'image/object/bbox/x3': float_feature(x3), 'image/object/bbox/y3': float_feature(y3), 'image/object/bbox/x4': float_feature(x4), 'image/object/bbox/y4': float_feature(y4), 'image/object/bbox/label': int64_feature(labels), 'image/object/bbox/label_text': bytes_feature(labels_text), 'image/object/bbox/difficult': int64_feature(difficult), 'image/object/bbox/truncated': int64_feature(truncated), 'image/object/bbox/ignored': int64_feature(ignored), 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data) })) return example
def _format_data(sess, image_reader, folder_path, pairs, i, labels, id_map, attr_mat, id_map_attr, all_peaks_dic, subsets_dic, FiltOutMissRegion=False): # Read the filename: img_path_0 = os.path.join(folder_path, pairs[i][0]) img_path_1 = os.path.join(folder_path, pairs[i][1]) id_0 = pairs[i][0].split('_')[0] id_1 = pairs[i][1].split('_')[0] image_raw_0 = tf.gfile.FastGFile(img_path_0, 'r').read() image_raw_1 = tf.gfile.FastGFile(img_path_1, 'r').read() height, width = image_reader.read_image_dims(sess, image_raw_0) attrs_0 = [] attrs_1 = [] if attr_mat is not None: idx_0 = id_map_attr[id_0] idx_1 = id_map_attr[id_1] for name in attr_mat.dtype.names: attrs_0.append(attr_mat[(name)][0][0][0][idx_0]) attrs_1.append(attr_mat[(name)][0][0][0][idx_1]) ########################## Pose 16x8 & Pose coodinate (for 128x64(Solid) 128x64(Gaussian))########################## ## Pose 16x8 w_unit = width / 16 h_unit = height / 16 pose_peaks_0 = np.zeros([16, 16, 18]) pose_peaks_1 = np.zeros([16, 16, 18]) ## Pose coodinate pose_peaks_0_rcv = np.zeros([18, 3]) pose_peaks_1_rcv = np.zeros([18, 3]) # pose_subs_0 = [] pose_subs_1 = [] # pdb.set_trace() if (all_peaks_dic is not None) and (pairs[i][0] in all_peaks_dic) and ( pairs[i][1] in all_peaks_dic): ## Pose 0 # peaks = all_peaks_dic[pairs[i][0]] peaks = _get_valid_peaks(all_peaks_dic[pairs[i][0]], subsets_dic[pairs[i][0]]) # print(peaks) indices_r4_0, values_r4_0, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid') indices_r4_0, shape_0 = _oneDimSparsePose(indices_r4_0, shape) indices_r8_0, values_r8_0, shape = _getSparsePose(peaks, height, width, 18, radius=8, mode='Solid') indices_r8_0, _ = _oneDimSparsePose(indices_r8_0, shape) # pose_dense_r4_0 = _sparse2dense(indices_r4_0, values_r4_0, shape) pose_mask_r4_0 = _getPoseMask(peaks, height, width, radius=4, mode='Solid') pose_mask_r8_0 = _getPoseMask(peaks, height, width, radius=8, mode='Solid') # indices_r6_v4_0, values_r6_v4_0, shape = _getSparsePose(peaks, height, width, 18, radius=6, var=4, mode='Gaussian') for ii in range(len(peaks)): p = peaks[ii] if 0 != len(p): pose_peaks_0[int(p[0][1] / h_unit), int(p[0][0] / w_unit), ii] = 1 pose_peaks_0_rcv[ii][0] = p[0][1] pose_peaks_0_rcv[ii][1] = p[0][0] pose_peaks_0_rcv[ii][2] = 1 ## Generate body region proposals # part_bbox_list_0, visibility_list_0 = get_part_bbox(peaks, img_path_0, i) part_bbox_list_0, visibility_list_0 = get_part_bbox(peaks, img_path_0) if FiltOutMissRegion and (0 in visibility_list_0): return None roi_mask_list_0 = get_roi_mask(part_bbox_list_0, visibility_list_0) roi10_mask_0 = np.transpose(np.squeeze(np.array(roi_mask_list_0)), [1, 2, 0]) ## Pose 1 # peaks = all_peaks_dic[pairs[i][1]] peaks = _get_valid_peaks(all_peaks_dic[pairs[i][1]], subsets_dic[pairs[i][1]]) indices_r4_1, values_r4_1, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid') indices_r4_1, shape_1 = _oneDimSparsePose(indices_r4_1, shape) indices_r8_1, values_r8_1, shape = _getSparsePose(peaks, height, width, 18, radius=8, mode='Solid') indices_r8_1, _ = _oneDimSparsePose(indices_r8_1, shape) # pose_dense_r4_1 = _sparse2dense(indices_r4_1, values_r4_1, shape) pose_mask_r4_1 = _getPoseMask(peaks, height, width, radius=4, mode='Solid') pose_mask_r8_1 = _getPoseMask(peaks, height, width, radius=8, mode='Solid') # indices_r6_v4_1, values_r6_v4_1, shape = _getSparsePose(peaks, height, width, 18, radius=6, var=4, mode='Gaussian') ## Generate body region proposals part_bbox_list_1, visibility_list_1 = get_part_bbox(peaks, img_path_1) if FiltOutMissRegion and (0 in visibility_list_1): return None roi_mask_list_1 = get_roi_mask(part_bbox_list_1, visibility_list_1) roi10_mask_1 = np.transpose(np.squeeze(np.array(roi_mask_list_1)), [1, 2, 0]) ###### Visualize ###### # dense = _sparse2dense(indices_r4, values_r4, shape) # _visualizePose(pose_mask_r4_0, scipy.misc.imread(img_path_0)) # _visualizePose(pose_mask_r4_1, scipy.misc.imread(img_path_1)) # if i in [0,5]: # _visualizePose(roi_mask_list_0[0], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[1], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[2], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[3], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[4], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[5], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[6], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[7], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[8], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[9], scipy.misc.imread(img_path_0)) # pdb.set_trace() for ii in range(len(peaks)): p = peaks[ii] if 0 != len(p): pose_peaks_1[int(p[0][1] / h_unit), int(p[0][0] / w_unit), ii] = 1 pose_peaks_1_rcv[ii][0] = p[0][1] pose_peaks_1_rcv[ii][1] = p[0][0] pose_peaks_1_rcv[ii][2] = 1 pose_subs_0 = subsets_dic[pairs[i][0]][0].tolist() pose_subs_1 = subsets_dic[pairs[i][1]][0].tolist() else: return None example = tf.train.Example(features=tf.train.Features( feature={ 'image_name_0': dataset_utils.bytes_feature(pairs[i][0]), 'image_name_1': dataset_utils.bytes_feature(pairs[i][1]), 'image_raw_0': dataset_utils.bytes_feature(image_raw_0), 'image_raw_1': dataset_utils.bytes_feature(image_raw_1), 'label': dataset_utils.int64_feature(labels[i]), 'id_0': dataset_utils.int64_feature(id_map[id_0]), 'id_1': dataset_utils.int64_feature(id_map[id_1]), 'cam_0': dataset_utils.int64_feature(-1), 'cam_1': dataset_utils.int64_feature(-1), 'image_format': dataset_utils.bytes_feature('jpg'), 'image_height': dataset_utils.int64_feature(height), 'image_width': dataset_utils.int64_feature(width), 'real_data': dataset_utils.int64_feature(1), 'attrs_0': dataset_utils.int64_feature(attrs_0), 'attrs_1': dataset_utils.int64_feature(attrs_1), 'pose_peaks_0': dataset_utils.float_feature(pose_peaks_0.flatten().tolist()), 'pose_peaks_1': dataset_utils.float_feature(pose_peaks_1.flatten().tolist()), 'pose_peaks_0_rcv': dataset_utils.float_feature(pose_peaks_0_rcv.flatten().tolist()), 'pose_peaks_1_rcv': dataset_utils.float_feature(pose_peaks_1_rcv.flatten().tolist()), # 'pose_dense_r4_0': dataset_utils.int64_feature(pose_dense_r4_0.astype(np.int64).flatten().tolist()), # 'pose_dense_r4_1': dataset_utils.int64_feature(pose_dense_r4_1.astype(np.int64).flatten().tolist()), 'pose_mask_r4_0': dataset_utils.int64_feature( pose_mask_r4_0.astype(np.int64).flatten().tolist()), 'pose_mask_r4_1': dataset_utils.int64_feature( pose_mask_r4_1.astype(np.int64).flatten().tolist()), 'pose_mask_r8_0': dataset_utils.int64_feature( pose_mask_r8_0.astype(np.int64).flatten().tolist()), 'pose_mask_r8_1': dataset_utils.int64_feature( pose_mask_r8_1.astype(np.int64).flatten().tolist()), 'shape': dataset_utils.int64_feature(shape_0), # 'indices_r6_v4_0': dataset_utils.int64_feature(np.array(indices_r6_v4_0).astype(np.int64).flatten().tolist()), # 'values_r6_v4_0': dataset_utils.float_feature(np.array(values_r6_v4_0).astype(np.float).flatten().tolist()), # 'indices_r6_v4_1': dataset_utils.int64_feature(np.array(indices_r6_v4_1).astype(np.int64).flatten().tolist()), # 'values_r6_v4_1': dataset_utils.float_feature(np.array(values_r6_v4_1).astype(np.float).flatten().tolist()), 'indices_r4_0': dataset_utils.int64_feature( np.array(indices_r4_0).astype(np.int64).flatten().tolist()), 'values_r4_0': dataset_utils.float_feature( np.array(values_r4_0).astype(np.float).flatten().tolist()), 'indices_r4_1': dataset_utils.int64_feature( np.array(indices_r4_1).astype(np.int64).flatten().tolist()), 'values_r4_1': dataset_utils.float_feature( np.array(values_r4_1).astype(np.float).flatten().tolist()), 'indices_r8_0': dataset_utils.int64_feature( np.array(indices_r8_0).astype(np.int64).flatten().tolist()), 'values_r8_0': dataset_utils.float_feature( np.array(values_r8_0).astype(np.float).flatten().tolist()), 'indices_r8_1': dataset_utils.int64_feature( np.array(indices_r8_1).astype(np.int64).flatten().tolist()), 'values_r8_1': dataset_utils.float_feature( np.array(values_r8_1).astype(np.float).flatten().tolist()), 'pose_subs_0': dataset_utils.float_feature(pose_subs_0), 'pose_subs_1': dataset_utils.float_feature(pose_subs_1), 'part_bbox_0': dataset_utils.int64_feature( np.array(part_bbox_list_0).astype( np.int64).flatten().tolist()), 'part_bbox_1': dataset_utils.int64_feature( np.array(part_bbox_list_1).astype( np.int64).flatten().tolist()), 'part_vis_0': dataset_utils.int64_feature( np.array(visibility_list_0).astype( np.int64).flatten().tolist()), 'part_vis_1': dataset_utils.int64_feature( np.array(visibility_list_1).astype( np.int64).flatten().tolist()), 'roi10_mask_0': dataset_utils.int64_feature( roi10_mask_0.astype(np.int64).flatten().tolist()), 'roi10_mask_1': dataset_utils.int64_feature( roi10_mask_1.astype(np.int64).flatten().tolist()), })) return example
def image_to_tfrecord(video_folder, annotation_file, tfrecord_filename): """Convert Image to TFRecords. crop_folder_name = crop_names.txt crops_to_label = name : label Other than image name, label_text, tfrecord constains additional information include image, image shape, label_id. image and image shape are computed by reading image from disk, label_id is obtai pos_label=ned by a map "label_to_id" from label_text to label_id. """ # read the label file #filename_to_labels_dict = read_label_file(crops_to_label) video_image_list = os.listdir(video_folder) video_label_list = os.listdir(annotation_file) # define image decoding graph inputs = tf.placeholder(dtype=tf.string) decoded_jpg = tf.image.decode_jpeg(inputs) # open tfRecord reader with tf.python_io.TFRecordWriter(tfrecord_filename) as tfrecord_writer: # open a session for image decoding with tf.Session() as sess: for label_file in video_label_list: if label_file.startswith("._"): continue print("VIDEO--------------------------- ", label_file) frame = "" label_path = annotation_file + "/" + label_file video_name = label_file.strip("\n").replace(".txt", "") with open(label_path, "r") as ff: annotations = ff.readlines() for line in annotations: line = line.strip() row = line.split(" ") if frame == row[0]: print("Appending") if row[2] == "Car" or row[2] == "Van" or row[ 2] == "Truck": label_class.append(int(1)) elif row[2] == "Pedestrian" or row[ 2] == "Person_sitting" or row[ 2] == "Cyclist": label_class.append(int(2)) else: label_class.append(int(0)) xmin.append(float(row[6])) ymin.append(float(row[7])) xmax.append(float(row[8])) ymax.append(float(row[9])) else: if frame != "" and os.path.exists(image): print('converting %s' % frame_name) # read image image_data = tf.gfile.FastGFile(image, 'rb').read() # decode image if frame_name.endswith(('png', 'PNG')): image_data_decoded = sess.run( decoded_jpg, feed_dict={inputs: image_data}) image_format = b'PNG' else: raise ValueError( "image%s is not supported" % frame_name) shape = list(image_data_decoded.shape) # create tf example example = tf.train.Example( features=tf.train.Features( feature={ 'image/format': bytes_feature(image_format), 'image/encoded': bytes_feature(image_data), 'image/filename': bytes_feature( bytes(frame_name, 'utf-8')), #'image/key/sha256' : bytes_feature(shape), 'image/source_id': bytes_feature( bytes(source_id, 'utf-8')), 'image/height': int64_feature(shape[0]), 'image/width': int64_feature(shape[1]), 'bbox/xmin': float_feature(xmin), 'bbox/xmax': float_feature(xmax), 'bbox/ymin': float_feature(ymin), 'bbox/ymax': float_feature(ymax), 'bbox/label/index': int64_feature(label_class) })) # write example tfrecord_writer.write( example.SerializeToString()) print("FRAME FOUND") xmin = [] ymin = [] xmax = [] ymax = [] label_class = [] frame = row[0] source_id = row[0] frame_name = row[0].zfill(6) + ".png" image = video_folder + "/" + video_name + "/" + frame_name if os.path.exists(image): if row[2] == "Car" or row[ 2] == "Van" or row[2] == "Truck": label_class.append(int(1)) elif row[2] == "Pedestrian" or row[ 2] == "Person_sitting" or row[ 2] == "Cyclist": label_class.append(int(2)) else: label_class.append(int(0)) xmin.append(float(row[6])) ymin.append(float(row[7])) xmax.append(float(row[8])) ymax.append(float(row[9])) else: print("START FRAME") xmin = [] ymin = [] xmax = [] ymax = [] label_class = [] frame = row[0] source_id = row[0] frame_name = row[0].zfill(6) + ".png" image = video_folder + "/" + video_name + "/" + frame_name if os.path.exists(image): if row[2] == "Car" or row[ 2] == "Van" or row[2] == "Truck": label_class.append(int(1)) elif row[2] == "Pedestrian" or row[ 2] == "Person_sitting" or row[ 2] == "Cyclist": label_class.append(int(2)) else: label_class.append(int(0)) xmin.append(float(row[6])) ymin.append(float(row[7])) xmax.append(float(row[8])) ymax.append(float(row[9]))
def _format_data(sess, image_reader, folder_path, pairs, idx, labels, id_map, attr_onehot_mat, attr_w2v25_mat, attr_w2v50_mat, attr_w2v100_mat, attr_w2v150_mat, id_map_attr, all_peaks_dic, subsets_dic, seg_data_dir, FiltOutMissRegion=False, FLIP=False): # Read the filename: img_path_0 = os.path.join(folder_path, pairs[idx][0]) img_path_1 = os.path.join(folder_path, pairs[idx][1]) id_0 = pairs[idx][0][0:4] id_1 = pairs[idx][1][0:4] cam_0 = pairs[idx][0][6] cam_1 = pairs[idx][1][6] image_raw_0 = tf.gfile.FastGFile(img_path_0, 'r').read() image_raw_1 = tf.gfile.FastGFile(img_path_1, 'r').read() height, width = image_reader.read_image_dims(sess, image_raw_0) ########################## Attribute ########################## attrs_0 = [] attrs_1 = [] attrs_w2v25_0 = [] attrs_w2v25_1 = [] attrs_w2v50_0 = [] attrs_w2v50_1 = [] attrs_w2v100_0 = [] attrs_w2v100_1 = [] attrs_w2v150_0 = [] attrs_w2v150_1 = [] idx_0 = id_map_attr[id_0] idx_1 = id_map_attr[id_1] # pdb.set_trace() if attr_onehot_mat is not None: for name in attr_onehot_mat.dtype.names: attrs_0.append(attr_onehot_mat[(name)][0][0][0][idx_0]) attrs_1.append(attr_onehot_mat[(name)][0][0][0][idx_1]) if attr_w2v25_mat is not None: for i in xrange(attr_w2v25_mat[0].shape[0]): attrs_w2v25_0 = attrs_w2v25_0 + attr_w2v25_mat[0][i][idx_0].tolist() attrs_w2v25_1 = attrs_w2v25_1 + attr_w2v25_mat[0][i][idx_1].tolist() if attr_w2v50_mat is not None: for i in xrange(attr_w2v50_mat[0].shape[0]): attrs_w2v50_0 = attrs_w2v50_0 + attr_w2v50_mat[0][i][idx_0].tolist() attrs_w2v50_1 = attrs_w2v50_1 + attr_w2v50_mat[0][i][idx_1].tolist() if attr_w2v100_mat is not None: for i in xrange(attr_w2v100_mat[0].shape[0]): attrs_w2v100_0 = attrs_w2v100_0 + attr_w2v100_mat[0][i][idx_0].tolist() attrs_w2v100_1 = attrs_w2v100_1 + attr_w2v100_mat[0][i][idx_1].tolist() if attr_w2v150_mat is not None: for i in xrange(attr_w2v150_mat[0].shape[0]): attrs_w2v150_0 = attrs_w2v150_0 + attr_w2v150_mat[0][i][idx_0].tolist() attrs_w2v150_1 = attrs_w2v150_1 + attr_w2v150_mat[0][i][idx_1].tolist() ########################## Segment ########################## seg_0 = np.zeros([128,64]) seg_1 = np.zeros([128,64]) if seg_data_dir: path_0 = os.path.join(seg_data_dir, pairs[idx][0]) path_1 = os.path.join(seg_data_dir, pairs[idx][1]) if os.exists(path_0) and os.exists(path_1): seg_0 = scipy.misc.imread(path_0) seg_1 = scipy.misc.imread(path_1) if FLIP: # pdb.set_trace() seg_0 = np.fliplr(seg_0) seg_1 = np.fliplr(seg_1) else: return None ########################## Pose 16x8 & Pose coodinate (for 128x64(Solid) 128x64(Gaussian))########################## ## Pose 16x8 w_unit = width/8 h_unit = height/16 pose_peaks_0 = np.zeros([16,8,18]) pose_peaks_1 = np.zeros([16,8,18]) ## Pose coodinate pose_peaks_0_rcv = np.zeros([18,3]) ## Row, Column, Visibility pose_peaks_1_rcv = np.zeros([18,3]) # pose_subs_0 = [] pose_subs_1 = [] # pdb.set_trace() if (all_peaks_dic is not None) and (pairs[idx][0] in all_peaks_dic) and (pairs[idx][1] in all_peaks_dic): ###### Pose 0 ###### peaks = _get_valid_peaks(all_peaks_dic[pairs[idx][0]], subsets_dic[pairs[idx][0]]) indices_r4_0, values_r4_0, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid') indices_r4_0, shape_0 = _oneDimSparsePose(indices_r4_0, shape) pose_mask_r4_0 = _getPoseMask(peaks, height, width, radius=4, mode='Solid') pose_mask_r7_0 = _getPoseMask(peaks, height, width, radius=7, mode='Solid') for ii in range(len(peaks)): p = peaks[ii] if 0!=len(p): pose_peaks_0[int(p[0][1]/h_unit), int(p[0][0]/w_unit), ii] = 1 pose_peaks_0_rcv[ii][0] = p[0][1] pose_peaks_0_rcv[ii][1] = p[0][0] pose_peaks_0_rcv[ii][2] = 1 ## Generate body region proposals # part_bbox_list_0, visibility_list_0 = get_part_bbox7(peaks, img_path_0, radius=6, idx=idx) part_bbox_list_0, visibility_list_0 = get_part_bbox37(peaks, img_path_0, radius=6) if FiltOutMissRegion and (0 in visibility_list_0): return None ###### Pose 1 ###### peaks = _get_valid_peaks(all_peaks_dic[pairs[idx][1]], subsets_dic[pairs[idx][1]]) indices_r4_1, values_r4_1, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid') indices_r4_1, shape_1 = _oneDimSparsePose(indices_r4_1, shape) pose_mask_r4_1 = _getPoseMask(peaks, height, width, radius=4, mode='Solid') pose_mask_r7_1 = _getPoseMask(peaks, height, width, radius=7, mode='Solid') ## Generate body region proposals # part_bbox_list_1, visibility_list_1 = get_part_bbox7(peaks, img_path_1, radius=7) part_bbox_list_1, visibility_list_1 = get_part_bbox37(peaks, img_path_0, radius=6) if FiltOutMissRegion and (0 in visibility_list_1): return None ###### Visualize ###### # dense = _sparse2dense(indices_r4_0, values_r4_0, shape) # _visualizePose(pose_mask_r4_0, scipy.misc.imread(img_path_0)) # _visualizePose(pose_mask_r7_0, scipy.misc.imread(img_path_0)) # pdb.set_trace() for ii in range(len(peaks)): p = peaks[ii] if 0!=len(p): pose_peaks_1[int(p[0][1]/h_unit), int(p[0][0]/w_unit), ii] = 1 pose_peaks_1_rcv[ii][0] = p[0][1] pose_peaks_1_rcv[ii][1] = p[0][0] pose_peaks_1_rcv[ii][2] = 1 pose_subs_0 = subsets_dic[pairs[idx][0]][0].tolist() pose_subs_1 = subsets_dic[pairs[idx][1]][0].tolist() else: return None example = tf.train.Example(features=tf.train.Features(feature={ 'image_name_0': dataset_utils.bytes_feature(pairs[idx][0]), 'image_name_1': dataset_utils.bytes_feature(pairs[idx][1]), 'image_raw_0': dataset_utils.bytes_feature(image_raw_0), 'image_raw_1': dataset_utils.bytes_feature(image_raw_1), 'label': dataset_utils.int64_feature(labels[idx]), 'id_0': dataset_utils.int64_feature(id_map[id_0]), 'id_1': dataset_utils.int64_feature(id_map[id_1]), 'cam_0': dataset_utils.int64_feature(int(cam_0)), 'cam_1': dataset_utils.int64_feature(int(cam_1)), 'image_format': dataset_utils.bytes_feature('jpg'), 'image_height': dataset_utils.int64_feature(height), 'image_width': dataset_utils.int64_feature(width), 'real_data': dataset_utils.int64_feature(1), 'attrs_0': dataset_utils.int64_feature(attrs_0), 'attrs_1': dataset_utils.int64_feature(attrs_1), 'attrs_w2v25_0': dataset_utils.float_feature(attrs_w2v25_0), 'attrs_w2v25_1': dataset_utils.float_feature(attrs_w2v25_1), 'attrs_w2v50_0': dataset_utils.float_feature(attrs_w2v50_0), 'attrs_w2v50_1': dataset_utils.float_feature(attrs_w2v50_1), 'attrs_w2v100_0': dataset_utils.float_feature(attrs_w2v100_0), 'attrs_w2v100_1': dataset_utils.float_feature(attrs_w2v100_1), 'attrs_w2v150_0': dataset_utils.float_feature(attrs_w2v150_0), 'attrs_w2v150_1': dataset_utils.float_feature(attrs_w2v150_1), 'pose_peaks_0': dataset_utils.float_feature(pose_peaks_0.flatten().tolist()), 'pose_peaks_1': dataset_utils.float_feature(pose_peaks_1.flatten().tolist()), 'pose_peaks_0_rcv': dataset_utils.float_feature(pose_peaks_0_rcv.flatten().tolist()), 'pose_peaks_1_rcv': dataset_utils.float_feature(pose_peaks_1_rcv.flatten().tolist()), 'pose_mask_r4_0': dataset_utils.int64_feature(pose_mask_r4_0.astype(np.int64).flatten().tolist()), 'pose_mask_r4_1': dataset_utils.int64_feature(pose_mask_r4_1.astype(np.int64).flatten().tolist()), 'pose_mask_r6_0': dataset_utils.int64_feature(pose_mask_r7_0.astype(np.int64).flatten().tolist()), 'pose_mask_r6_1': dataset_utils.int64_feature(pose_mask_r7_1.astype(np.int64).flatten().tolist()), 'seg_0': dataset_utils.int64_feature(seg_0.astype(np.int64).flatten().tolist()), 'seg_1': dataset_utils.int64_feature(seg_1.astype(np.int64).flatten().tolist()), 'shape': dataset_utils.int64_feature(shape_0), 'indices_r4_0': dataset_utils.int64_feature(np.array(indices_r4_0).astype(np.int64).flatten().tolist()), 'values_r4_0': dataset_utils.float_feature(np.array(values_r4_0).astype(np.float).flatten().tolist()), 'indices_r4_1': dataset_utils.int64_feature(np.array(indices_r4_1).astype(np.int64).flatten().tolist()), 'values_r4_1': dataset_utils.float_feature(np.array(values_r4_1).astype(np.float).flatten().tolist()), 'pose_subs_0': dataset_utils.float_feature(pose_subs_0), 'pose_subs_1': dataset_utils.float_feature(pose_subs_1), 'part_bbox_0': dataset_utils.int64_feature(np.array(part_bbox_list_0).astype(np.int64).flatten().tolist()), 'part_bbox_1': dataset_utils.int64_feature(np.array(part_bbox_list_1).astype(np.int64).flatten().tolist()), 'part_vis_0': dataset_utils.int64_feature(np.array(visibility_list_0).astype(np.int64).flatten().tolist()), 'part_vis_1': dataset_utils.int64_feature(np.array(visibility_list_1).astype(np.int64).flatten().tolist()), })) return example
def _format_data(sess, image_reader, folder_path, pairs, i, labels, id_map, attr_mat, id_map_attr, all_peaks_dic, subsets_dic, FiltOutMissRegion=False): # Read the filename: img_path_0 = os.path.join(folder_path, pairs[i][0]) img_path_1 = os.path.join(folder_path, pairs[i][1]) id_0 = pairs[i][0].split('_')[0] id_1 = pairs[i][1].split('_')[0] image_raw_0 = tf.gfile.FastGFile(img_path_0, 'r').read() image_raw_1 = tf.gfile.FastGFile(img_path_1, 'r').read() height, width = image_reader.read_image_dims(sess, image_raw_0) attrs_0 = [] attrs_1 = [] if attr_mat is not None: idx_0 = id_map_attr[id_0] idx_1 = id_map_attr[id_1] for name in attr_mat.dtype.names: attrs_0.append(attr_mat[(name)][0][0][0][idx_0]) attrs_1.append(attr_mat[(name)][0][0][0][idx_1]) ########################## Pose 16x8 & Pose coodinate (for 128x64(Solid) 128x64(Gaussian))########################## ## Pose 16x8 w_unit = width/16 h_unit = height/16 pose_peaks_0 = np.zeros([16,16,18]) pose_peaks_1 = np.zeros([16,16,18]) ## Pose coodinate pose_peaks_0_rcv = np.zeros([18,3]) pose_peaks_1_rcv = np.zeros([18,3]) # pose_subs_0 = [] pose_subs_1 = [] # pdb.set_trace() if (all_peaks_dic is not None) and (pairs[i][0] in all_peaks_dic) and (pairs[i][1] in all_peaks_dic): ## Pose 0 # peaks = all_peaks_dic[pairs[i][0]] peaks = _get_valid_peaks(all_peaks_dic[pairs[i][0]], subsets_dic[pairs[i][0]]) # print(peaks) indices_r4_0, values_r4_0, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid') indices_r4_0, shape_0 = _oneDimSparsePose(indices_r4_0, shape) indices_r8_0, values_r8_0, shape = _getSparsePose(peaks, height, width, 18, radius=8, mode='Solid') indices_r8_0, _ = _oneDimSparsePose(indices_r8_0, shape) # pose_dense_r4_0 = _sparse2dense(indices_r4_0, values_r4_0, shape) pose_mask_r4_0 = _getPoseMask(peaks, height, width, radius=4, mode='Solid') pose_mask_r8_0 = _getPoseMask(peaks, height, width, radius=8, mode='Solid') # indices_r6_v4_0, values_r6_v4_0, shape = _getSparsePose(peaks, height, width, 18, radius=6, var=4, mode='Gaussian') for ii in range(len(peaks)): p = peaks[ii] if 0!=len(p): pose_peaks_0[int(p[0][1]/h_unit), int(p[0][0]/w_unit), ii] = 1 pose_peaks_0_rcv[ii][0] = p[0][1] pose_peaks_0_rcv[ii][1] = p[0][0] pose_peaks_0_rcv[ii][2] = 1 ## Generate body region proposals # part_bbox_list_0, visibility_list_0 = get_part_bbox(peaks, img_path_0, i) part_bbox_list_0, visibility_list_0 = get_part_bbox(peaks, img_path_0) if FiltOutMissRegion and (0 in visibility_list_0): return None roi_mask_list_0 = get_roi_mask(part_bbox_list_0, visibility_list_0) roi10_mask_0 = np.transpose(np.squeeze(np.array(roi_mask_list_0)),[1,2,0]) ## Pose 1 # peaks = all_peaks_dic[pairs[i][1]] peaks = _get_valid_peaks(all_peaks_dic[pairs[i][1]], subsets_dic[pairs[i][1]]) indices_r4_1, values_r4_1, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid') indices_r4_1, shape_1 = _oneDimSparsePose(indices_r4_1, shape) indices_r8_1, values_r8_1, shape = _getSparsePose(peaks, height, width, 18, radius=8, mode='Solid') indices_r8_1, _ = _oneDimSparsePose(indices_r8_1, shape) # pose_dense_r4_1 = _sparse2dense(indices_r4_1, values_r4_1, shape) pose_mask_r4_1 = _getPoseMask(peaks, height, width, radius=4, mode='Solid') pose_mask_r8_1 = _getPoseMask(peaks, height, width, radius=8, mode='Solid') # indices_r6_v4_1, values_r6_v4_1, shape = _getSparsePose(peaks, height, width, 18, radius=6, var=4, mode='Gaussian') ## Generate body region proposals part_bbox_list_1, visibility_list_1 = get_part_bbox(peaks, img_path_1) if FiltOutMissRegion and (0 in visibility_list_1): return None roi_mask_list_1 = get_roi_mask(part_bbox_list_1, visibility_list_1) roi10_mask_1 = np.transpose(np.squeeze(np.array(roi_mask_list_1)),[1,2,0]) ###### Visualize ###### # dense = _sparse2dense(indices_r4, values_r4, shape) # _visualizePose(pose_mask_r4_0, scipy.misc.imread(img_path_0)) # _visualizePose(pose_mask_r4_1, scipy.misc.imread(img_path_1)) # if i in [0,5]: # _visualizePose(roi_mask_list_0[0], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[1], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[2], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[3], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[4], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[5], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[6], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[7], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[8], scipy.misc.imread(img_path_0)) # _visualizePose(roi_mask_list_0[9], scipy.misc.imread(img_path_0)) # pdb.set_trace() for ii in range(len(peaks)): p = peaks[ii] if 0!=len(p): pose_peaks_1[int(p[0][1]/h_unit), int(p[0][0]/w_unit), ii] = 1 pose_peaks_1_rcv[ii][0] = p[0][1] pose_peaks_1_rcv[ii][1] = p[0][0] pose_peaks_1_rcv[ii][2] = 1 pose_subs_0 = subsets_dic[pairs[i][0]][0].tolist() pose_subs_1 = subsets_dic[pairs[i][1]][0].tolist() else: return None example = tf.train.Example(features=tf.train.Features(feature={ 'image_name_0': dataset_utils.bytes_feature(pairs[i][0]), 'image_name_1': dataset_utils.bytes_feature(pairs[i][1]), 'image_raw_0': dataset_utils.bytes_feature(image_raw_0), 'image_raw_1': dataset_utils.bytes_feature(image_raw_1), 'label': dataset_utils.int64_feature(labels[i]), 'id_0': dataset_utils.int64_feature(id_map[id_0]), 'id_1': dataset_utils.int64_feature(id_map[id_1]), 'cam_0': dataset_utils.int64_feature(-1), 'cam_1': dataset_utils.int64_feature(-1), 'image_format': dataset_utils.bytes_feature('jpg'), 'image_height': dataset_utils.int64_feature(height), 'image_width': dataset_utils.int64_feature(width), 'real_data': dataset_utils.int64_feature(1), 'attrs_0': dataset_utils.int64_feature(attrs_0), 'attrs_1': dataset_utils.int64_feature(attrs_1), 'pose_peaks_0': dataset_utils.float_feature(pose_peaks_0.flatten().tolist()), 'pose_peaks_1': dataset_utils.float_feature(pose_peaks_1.flatten().tolist()), 'pose_peaks_0_rcv': dataset_utils.float_feature(pose_peaks_0_rcv.flatten().tolist()), 'pose_peaks_1_rcv': dataset_utils.float_feature(pose_peaks_1_rcv.flatten().tolist()), # 'pose_dense_r4_0': dataset_utils.int64_feature(pose_dense_r4_0.astype(np.int64).flatten().tolist()), # 'pose_dense_r4_1': dataset_utils.int64_feature(pose_dense_r4_1.astype(np.int64).flatten().tolist()), 'pose_mask_r4_0': dataset_utils.int64_feature(pose_mask_r4_0.astype(np.int64).flatten().tolist()), 'pose_mask_r4_1': dataset_utils.int64_feature(pose_mask_r4_1.astype(np.int64).flatten().tolist()), 'pose_mask_r8_0': dataset_utils.int64_feature(pose_mask_r8_0.astype(np.int64).flatten().tolist()), 'pose_mask_r8_1': dataset_utils.int64_feature(pose_mask_r8_1.astype(np.int64).flatten().tolist()), 'shape': dataset_utils.int64_feature(shape_0), # 'indices_r6_v4_0': dataset_utils.int64_feature(np.array(indices_r6_v4_0).astype(np.int64).flatten().tolist()), # 'values_r6_v4_0': dataset_utils.float_feature(np.array(values_r6_v4_0).astype(np.float).flatten().tolist()), # 'indices_r6_v4_1': dataset_utils.int64_feature(np.array(indices_r6_v4_1).astype(np.int64).flatten().tolist()), # 'values_r6_v4_1': dataset_utils.float_feature(np.array(values_r6_v4_1).astype(np.float).flatten().tolist()), 'indices_r4_0': dataset_utils.int64_feature(np.array(indices_r4_0).astype(np.int64).flatten().tolist()), 'values_r4_0': dataset_utils.float_feature(np.array(values_r4_0).astype(np.float).flatten().tolist()), 'indices_r4_1': dataset_utils.int64_feature(np.array(indices_r4_1).astype(np.int64).flatten().tolist()), 'values_r4_1': dataset_utils.float_feature(np.array(values_r4_1).astype(np.float).flatten().tolist()), 'indices_r8_0': dataset_utils.int64_feature(np.array(indices_r8_0).astype(np.int64).flatten().tolist()), 'values_r8_0': dataset_utils.float_feature(np.array(values_r8_0).astype(np.float).flatten().tolist()), 'indices_r8_1': dataset_utils.int64_feature(np.array(indices_r8_1).astype(np.int64).flatten().tolist()), 'values_r8_1': dataset_utils.float_feature(np.array(values_r8_1).astype(np.float).flatten().tolist()), 'pose_subs_0': dataset_utils.float_feature(pose_subs_0), 'pose_subs_1': dataset_utils.float_feature(pose_subs_1), 'part_bbox_0': dataset_utils.int64_feature(np.array(part_bbox_list_0).astype(np.int64).flatten().tolist()), 'part_bbox_1': dataset_utils.int64_feature(np.array(part_bbox_list_1).astype(np.int64).flatten().tolist()), 'part_vis_0': dataset_utils.int64_feature(np.array(visibility_list_0).astype(np.int64).flatten().tolist()), 'part_vis_1': dataset_utils.int64_feature(np.array(visibility_list_1).astype(np.int64).flatten().tolist()), 'roi10_mask_0': dataset_utils.int64_feature(roi10_mask_0.astype(np.int64).flatten().tolist()), 'roi10_mask_1': dataset_utils.int64_feature(roi10_mask_1.astype(np.int64).flatten().tolist()), })) return example