예제 #1
0
def _convert_to_example(image_data, shape, bboxes, labels, difficult,
                        truncated, preprocessed_box, name):
    '''
    '''
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    for b in bboxes:
        assert len(b) == 4
        # pylint: disable=expression-not-assigned
        [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)]
        # pylint: enable=expression-not-assigned

    image_format = b'JPEG'
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height': int64_feature(shape[0]),
            'image/width': int64_feature(shape[1]),
            'image/channels': int64_feature(shape[2]),
            'image/shape': int64_feature(shape),
            'image/object/bbox/xmin': float_feature(xmin),
            'image/object/bbox/xmax': float_feature(xmax),
            'image/object/bbox/ymin': float_feature(ymin),
            'image/object/bbox/ymax': float_feature(ymax),
            'image/object/bbox/label': float_feature(labels),
            'image/object/bbox/difficult': int64_feature(difficult),
            'image/object/bbox/truncated': int64_feature(truncated),
            'image/format': bytes_feature(image_format),
            'image/preprocessed_box': int64_feature(preprocessed_box.tolist()),
            'image/encoded': bytes_feature(image_data),
            'image/name': bytes_feature(bytes(name, encoding='utf-8'))
        }))
    return example
예제 #2
0
def _convert_to_example(image_data, image_shape, density_map, density_shape):
    """Build an Example proto for an image example.

    Args:
      image_data: image raw data(string)
      image_shape: shape of image
      density_map: density map which is generated from annotations
      density_shape: shape of density map
    Returns:
      Example proto
    """
    assert (image_shape[0] == RESIZED_IMAGE_SHAPE[0])
    assert (image_shape[1] == RESIZED_IMAGE_SHAPE[1])
    assert (density_shape[0] == RESIZED_IMAGE_SHAPE[0] / SHRINK_RATIO)
    assert (density_shape[1] == RESIZED_IMAGE_SHAPE[1] / SHRINK_RATIO)
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            int64_feature(image_shape[0]),
            'image/width':
            int64_feature(image_shape[1]),
            'image/channels':
            int64_feature(CHANNELS),
            'image/shape':
            int64_feature([image_shape[0], image_shape[1], CHANNELS]),
            'image/encoded':
            bytes_feature(image_data),
            'image/format':
            bytes_feature(b'RAW'),
            'image/density_map/shape':
            int64_feature([density_shape[0], density_shape[1]]),
            'image/density_map/data':
            float_feature(density_map.flatten())
        }))
    return example
def _convert_to_example(image_data, labels, labels_text, bboxes, shape,
                        difficult, truncated):
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    for b in bboxes:
        assert len(b) == 4
        # pylint: disable=expression-not-assigned注意这里坐标的顺序
        [l.append(point) for l, point in zip([xmin, ymin, xmax, ymax], b)]
        # pylint: enable=expression-not-assigned

    image_format = b'JPEG'
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height': int64_feature(shape[0]),
            'image/width': int64_feature(shape[1]),
            'image/channels': int64_feature(shape[2]),
            'image/shape': int64_feature(shape),
            'image/object/bbox/xmin': float_feature(xmin),
            'image/object/bbox/xmax': float_feature(xmax),
            'image/object/bbox/ymin': float_feature(ymin),
            'image/object/bbox/ymax': float_feature(ymax),
            'image/object/bbox/label': int64_feature(labels),
            'image/object/bbox/label_text': bytes_feature(labels_text),
            'image/object/bbox/difficult': int64_feature(difficult),
            'image/object/bbox/truncated': int64_feature(truncated),
            'image/format': bytes_feature(image_format),
            'image/encoded': bytes_feature(image_data)
        }))
    return example
예제 #4
0
def _format_data(sess, image_reader, idx, tmp_dir, pathlist_A, pathlist_B):
    ## Resize and random flip
    # if np.random.rand()>0.5:
    #     IMG_FLIP = True
    # else:
    #     IMG_FLIP = False
    IMG_FLIP = False
    path_A = _img_resize_flip(pathlist_A[idx], IMG_FLIP, tmp_dir, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT)
    path_B = _img_resize_flip(pathlist_B[idx], IMG_FLIP, tmp_dir, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT)

    image_raw_A = tf.gfile.FastGFile(path_A, 'r').read()
    image_raw_B = tf.gfile.FastGFile(path_B, 'r').read()

    height, width = image_reader.read_image_dims(sess, image_raw_A)
    # pdb.set_trace()
    example = tf.train.Example(features=tf.train.Features(feature={
        'image_name_A': dataset_utils.bytes_feature(pathlist_A[idx].split('/')[-1]),
        'image_name_B': dataset_utils.bytes_feature(pathlist_B[idx].split('/')[-1]),
        'image_raw_A': dataset_utils.bytes_feature(image_raw_A),
        'image_raw_B': dataset_utils.bytes_feature(image_raw_B),
        'image_format': dataset_utils.bytes_feature('png'),
        'image_height': dataset_utils.int64_feature(height),
        'image_width': dataset_utils.int64_feature(width),
    }))
    return example
def convert_to_example(imgdata, shape, labels, labels_text, bboxes):
    '''转换数据'''
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    for b in bboxes:
        assert len(b) == 4
        [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)]

    image_format = b'JPEG'
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height': int64_feature(shape[0]),
            'image/width': int64_feature(shape[1]),
            'image/channel': int64_feature(shape[2]),
            'image/shape': int64_feature(list(shape)),
            'image/object/bbox/xmin': float_feature(xmin),
            'image/object/bbox/xmax': float_feature(xmax),
            'image/object/bbox/ymin': float_feature(ymin),
            'image/object/bbox/ymax': float_feature(ymax),
            'image/object/bbox/label': int64_feature(labels),
            'image/object/bbox/label_text': bytes_feature(labels_text),
            'image/format': bytes_feature(image_format),
            'image/encoded': bytes_feature(imgdata)
        }))
    return example
예제 #6
0
def process_data1(image,  tfrecord_write):
    image_format = b'PNG'
    example = tf.train.Example( features = tf.train.Features(
        feature ={
            'image/encoded' : bytes_feature(image),
            'image/format' : bytes_feature(image_format),
        }
    ))
    tfrecord_write.write(example.SerializeToString())
예제 #7
0
def dict_to_tf_example(encoded_jpg, label, theta):
    class_label = 0 if label == 'positive' else 1
    theta_label = theta
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/encoded': dataset_utils.bytes_feature(encoded_jpg),
            'image/format': dataset_utils.bytes_feature('jpeg'),
            'image/class/label': dataset_utils.int64_feature(class_label),
            'image/theta/label': dataset_utils.int64_feature(theta_label),
        }))
    return example
def create_tf_example(example):
    
    # Udacity real data image format from Carla
    # Files can be downloaded from https://mega.nz/#F!ldJhzRhL!NWASXMs4cWegrYYNbJ7bEg
    height = 1096 # Image height
    width = 1368 # Image width

    filename = example['path'] # Filename of the image. Empty if image is not from file
    filename = filename.encode()

    with tf.gfile.GFile(example['path'], 'rb') as fid:
        encoded_image = fid.read()

    image_format = 'jpg'.encode() 

    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [] # List of normalized right x coordinates in bounding box
                # (1 per box)
    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [] # List of normalized bottom y coordinates in bounding box
                # (1 per box)
    classes_text = [] # List of string class name of bounding box (1 per box)
    classes = [] # List of integer class id of bounding box (1 per box)

    for box in example['boxes']:
        #if box['occluded'] is False:
        #print("adding box")
        xmins.append(float(box['x_min'] / width))
        xmaxs.append(float(box['x_max'] / width))
        ymins.append(float(box['y_min'] / height))
        ymaxs.append(float(box['y_max'] / height))
        classes_text.append(box['label'].encode())
        classes.append(int(LABEL_DICT_4[box['label']]))


    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_utils.int64_feature(height),
        'image/width': dataset_utils.int64_feature(width),
        'image/filename': dataset_utils.bytes_feature(filename),
        'image/source_id': dataset_utils.bytes_feature(filename),
        'image/encoded': dataset_utils.bytes_feature(encoded_image),
        'image/format': dataset_utils.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_utils.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_utils.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_utils.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_utils.float_list_feature(ymaxs),
        'image/object/class/text': dataset_utils.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_utils.int64_list_feature(classes),
    }))

    return tf_example
예제 #9
0
def process_data(image, image_mask, tfrecord_write):
    image_format = b'PNG'
    image_mask_format = b'PNG'
    xxx = random.random()
    example = tf.train.Example( features = tf.train.Features(
        feature ={
            'image/encoded' : bytes_feature(image),
            'image/format' : bytes_feature(image_format),
            #'mask/encode' :bytes_feature(image_mask),
            #'mask/format' :bytes_feature(image_mask_format),
            #'mask/encode' :_EncodedFloatFeature(image_mask),
        }
    ))
    tfrecord_write.write(example.SerializeToString())
예제 #10
0
def _add_to_tfrecord(data_filename, labels_filename, num_images,
                     tfrecord_writer):
    """Loads data from the binary MNIST files and writes files to a TFRecord.

  Args:
    data_filename: The filename of the MNIST images.
    labels_filename: The filename of the MNIST labels.
    num_images: The number of images in the dataset.
    tfrecord_writer: The TFRecord writer to use for writing.
  """
    images = _extract_images(data_filename, num_images)
    labels = _extract_labels(labels_filename, num_images)

    shape = (_IMAGE_SIZE, _IMAGE_SIZE, _NUM_CHANNELS)
    with tf.Graph().as_default():
        image = tf.placeholder(dtype=tf.uint8, shape=shape)
        encoded_png = tf.image.encode_png(image)

        with tf.Session('') as sess:
            for j in range(num_images):
                sys.stdout.write('\r>> Converting image %d/%d' %
                                 (j + 1, num_images))
                sys.stdout.flush()
                image_raw = images[j].tostring()
                feature = {
                    'label': dataset_utils.int64_feature(int(labels[j])),
                    'image_raw': dataset_utils.bytes_feature(image_raw)
                }
                features = tf.train.Features(feature=feature)
                example = tf.train.Example(features=features)
                # png_string = sess.run(encoded_png, feed_dict={image: images[j]})
                # example = dataset_utils.image_to_tfexample(
                #     png_string, 'png'.encode(), _IMAGE_SIZE, _IMAGE_SIZE, labels[j])
                tfrecord_writer.write(example.SerializeToString())
예제 #11
0
def dict_to_tf_example(path, size, label, theta):
    with tf.gfile.GFile(path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    class_label = 1 if label == 'positive' else 0
    theta_label = convert_theta(float(theta))
    example = tf.train.Example(features=tf.train.Features(feature={
        'image/encoded': dataset_utils.bytes_feature(encoded_jpg),
        'image/format': dataset_utils.bytes_feature('jpeg'),
        'image/class/label': dataset_utils.int64_feature(class_label),
        'image/theta/label': dataset_utils.int64_feature(theta_label),
    }))
    return example
def _convert_to_example(image_data, labels, labels_text, bboxes, shape,
                        difficult, truncated):
    """Build an Example proto for an image example.

    Args:
      image_data: string, JPEG encoding of RGB image;
      labels: list of integers, identifier for the ground truth;
      labels_text: list of strings, human-readable labels;
      bboxes: list of bounding boxes; each box is a list of integers;
          specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong
          to the same label as the image label.
      shape: 3 integers, image shapes in pixels.
    Returns:
      Example proto
    """
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    for b in bboxes:
        assert len(b) == 4
        # pylint: disable=expression-not-assigned
        #          [(ymin_0, xmin_0, ymax_0, xmax_0), (ymin_1, xmin_1, ymax_1, xmax_1), ....]
        #                                            |
        # [ymin_0, ymin_1, ...], [xmin_0, xmin_1, ...], [ymax_0, ymax_1, ...], [xmax_0, xmax_1, ...]
        [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)]
        # pylint: enable=expression-not-assigned

    image_format = b'JPEG'
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height': int64_feature(shape[0]),
            'image/width': int64_feature(shape[1]),
            'image/channels': int64_feature(shape[2]),
            'image/shape': int64_feature(shape),
            'image/object/bbox/xmin': float_feature(xmin),
            'image/object/bbox/xmax': float_feature(xmax),
            'image/object/bbox/ymin': float_feature(ymin),
            'image/object/bbox/ymax': float_feature(ymax),
            'image/object/bbox/label': int64_feature(labels),
            'image/object/bbox/label_text': bytes_feature(labels_text),
            'image/object/bbox/difficult': int64_feature(difficult),
            'image/object/bbox/truncated': int64_feature(truncated),
            'image/format': bytes_feature(image_format),
            'image/encoded': bytes_feature(image_data)
        }))
    return example
def _convert_to_example(image_data, labels, labels_text, bboxes, shape):
    """Build an Example proto for an image example.

    Args:
      image_data: string, JPEG encoding of RGB image;
      labels: list of integers, identifier for the ground truth;
      labels_text: list of strings, human-readable labels;
      bboxes: list of bounding boxes; each box is a list of integers;
          specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong
          to the same label as the image label.
      shape: 3 integers, image shapes in pixels.
    Returns:
      Example proto
    """
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    for b in bboxes:
        assert len(b) == 4
        [l.append(point) for l, point in zip([xmin, xmax, ymin, ymax], b)]

    print('xmin:', xmin)
    print('xmax:', xmax)
    print('ymin:', ymin)
    print('ymax:', ymax)

    image_format = b'PNG'
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height': int64_feature(shape[0]),
            'image/width': int64_feature(shape[1]),
            'image/channels': int64_feature(shape[2]),
            'image/shape': int64_feature(shape),
            'image/object/bbox/xmin': float_list_feature(xmin),
            'image/object/bbox/xmax': float_list_feature(xmax),
            'image/object/bbox/ymin': float_list_feature(ymin),
            'image/object/bbox/ymax': float_list_feature(ymax),
            'image/object/class/label': int64_feature(labels),
            'image/object/class/text': bytes_feature(labels_text),
            'image/format': bytes_feature(image_format),
            'image/encoded': bytes_feature(image_data)
        }))
    return example
예제 #14
0
def _add_to_tfrecord(filename, tfrecord_writer, offset=0):
    """Loads data from the cifar10 pickle files and writes files to a TFRecord.

  Args:
    filename: The filename of the cifar10 pickle file.
    tfrecord_writer: The TFRecord writer to use for writing.
    offset: An offset into the absolute number of images previously written.

  Returns:
    The new offset.
  """
    with tf.gfile.Open(filename, 'r') as f:
        data = cPickle.load(f)

    images = data['data']
    num_images = images.shape[0]

    images = images.reshape((num_images, 3, 32, 32))
    labels = data['labels']

    with tf.Graph().as_default():
        image_placeholder = tf.placeholder(dtype=tf.uint8)
        encoded_image = tf.image.encode_png(image_placeholder)

        with tf.Session('') as sess:

            for j in range(num_images):
                sys.stdout.write(
                    '\r>> Reading file [%s] image %d/%d' %
                    (filename, offset + j + 1, offset + num_images))
                sys.stdout.flush()

                # image = np.squeeze(images[j]).transpose((1, 2, 0))
                image = images[j].tostring()
                label = labels[j]

                # png_string = sess.run(encoded_image,
                #                       feed_dict={image_placeholder: image})

                # example = dataset_utils.image_to_tfexample(
                #     png_string, 'png', _IMAGE_SIZE, _IMAGE_SIZE, label)
                feature = {
                    'label': dataset_utils.int64_feature(int(label)),
                    'image': dataset_utils.bytes_feature(image)
                }
                features = tf.train.Features(feature=feature)
                example = tf.train.Example(features=features)

                tfrecord_writer.write(example.SerializeToString())

    return offset + num_images
예제 #15
0
def dict_to_tf_example(file_name):
    with open(os.path.join(args.data_path, folders[0], file_name + '.png'),
              'rb') as fid:
        encoded_color = fid.read()
    with open(os.path.join(args.data_path, folders[1], file_name + '.png'),
              'rb') as fid:
        encoded_depth = fid.read()
    with open(os.path.join(args.data_path, folders[2], file_name + '.png'),
              'rb') as fid:
        encoded_label_map = fid.read()
    with open(os.path.join(args.data_path, folders[3], file_name + '.png'),
              'rb') as fid:
        encoded_label_aug_map = fid.read()
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/color': dataset_utils.bytes_feature(encoded_color),
            'image/format': dataset_utils.bytes_feature('png'),
            'image/encoded_depth': dataset_utils.bytes_feature(encoded_depth),
            'image/label': dataset_utils.bytes_feature(encoded_label_map),
            'image/label_aug': dataset_utils.bytes_feature(
                encoded_label_aug_map),
        }))
    return example
예제 #16
0
def dict_to_tf_example(data, label):
    with open(data, 'rb') as inf:
        encoded_data = inf.read()
    img_label = cv2.imread(label)
    img_mask = image2label(img_label)
    encoded_label = img_mask.astype(np.uint8).tobytes()

    height, width = img_label.shape[0], img_label.shape[1]
    if height < vgg_16.default_image_size or width < vgg_16.default_image_size:
        # 保证最后随机裁剪的尺寸
        return None
    fname = data[data.rfind('/') + 1:]
    print(fname)
    # Your code here, fill the dict
    feature_dict = {
        'image/height': dataset_utils.int64_feature(height),
        'image/width': dataset_utils.int64_feature(width),
        'image/filename': dataset_utils.bytes_feature(fname.encode('utf8')),
        'image/encoded': dataset_utils.bytes_feature(encoded_data),
        'image/label': dataset_utils.bytes_feature(encoded_label),
        'image/format': dataset_utils.bytes_feature('jpeg'.encode('utf8')),
    }
    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    return example
예제 #17
0
def _convert_to_example(image_data, shape, bbox, label, imname):
    nbbox = np.array(bbox)
    ymin = list(nbbox[:, 0])
    xmin = list(nbbox[:, 1])
    ymax = list(nbbox[:, 2])
    xmax = list(nbbox[:, 3])

    print('shape: {}, height:{}, width:{}'.format(shape, shape[0], shape[1]))
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height': int64_feature(shape[0]),
            'image/width': int64_feature(shape[1]),
            'image/channels': int64_feature(shape[2]),
            'image/shape': int64_feature(shape),
            'image/object/bbox/xmin': float_feature(xmin),
            'image/object/bbox/xmax': float_feature(xmax),
            'image/object/bbox/ymin': float_feature(ymin),
            'image/object/bbox/ymax': float_feature(ymax),
            'image/object/bbox/label': int64_feature(label),
            'image/format': bytes_feature('jpeg'),
            'image/encoded': bytes_feature(image_data),
            'image/name': bytes_feature(imname),
        }))
    return example
def data_prep(raw_data,
              tfrecord_filename,
              _NUM_SHARDS,
              dataset_directory_address,
              split_name='Train'):
    # raw_data[i][0] image address for ith index
    # raw_data[i][1] boxes in image with ith index
    xMax = 0  #-----------------------
    yMax = 0  #-----------------------
    xMin = 0  #-----------------------
    yMin = 0  #-----------------------
    areaMax = 0  #-----------------------
    areaMin = 0  #-----------------------
    maxArea_coords = [0, 0, 0, 0]  #-----------------------
    minArea_coords = [0, 0, 0, 0]  #-----------------------

    resize_dir = 'resized_images_INRIA_%dx%d/' % (TARGET_W, TARGET_H)
    num__per_shard = math.ceil(len(raw_data) / float(_NUM_SHARDS))
    with tf.Graph().as_default():
        #image_reader = ImageReader()

        with tf.Session() as sess:
            for shard_id in range(_NUM_SHARDS):
                output_filename = _get_dataset_filename(
                    dataset_directory_address,
                    split_name,
                    shard_id,
                    tfrecord_filename=tfrecord_filename,
                    _NUM_SHARDS=_NUM_SHARDS)

                with tf.python_io.TFRecordWriter(
                        output_filename) as tfrecord_writer:
                    start_index = shard_id * num__per_shard
                    end_index = min((shard_id + 1) * num__per_shard,
                                    len(raw_data))
                    for i in range(start_index, end_index):
                        image = Image.open(dataset_directory_address +
                                           raw_data[i][0])
                        image_address = os.path.join(
                            resize_dir, raw_data[i][0]
                            [10:])  #dataset_directory_address + raw_data[i][0]
                        sys.stdout.write(
                            '\r>>Converting image %d/%d shard %d' %
                            (i + 1, len(raw_data), shard_id))
                        sys.stdout.flush()

                        orig_w, orig_h = image.size
                        image = image.convert('L')  # 8-bit grayscale
                        image = image.resize(
                            (IMG_W, IMG_H),
                            Image.LANCZOS)  # high-quality downsampling filter
                        if not os.path.exists(resize_dir):
                            os.makedirs(resize_dir)

                        image.save(
                            os.path.join(resize_dir, raw_data[i][0][10:]))

                        x_scale = IMG_W / orig_w
                        y_scale = IMG_H / orig_h
                        bboxes_coords = []
                        #for box in raw_data[i][1]:
                        for i, box in enumerate(raw_data[i][1]):
                            ul_x, ul_y, br_x, br_y = box
                            new_box_coordinates = (ul_x * x_scale,
                                                   ul_y * y_scale,
                                                   br_x * x_scale,
                                                   br_y * y_scale)
                            new_box_coordinates = [
                                round(x) for x in new_box_coordinates
                            ]

                            bboxes_coords.append(new_box_coordinates)
                            "raw_data[i][1][0][0] = new_box_coordinates[0]"
                            "raw_data[i][1][0][1] = new_box_coordinates[1]"
                            "raw_data[i][1][0][2] = new_box_coordinates[2]"
                            "raw_data[i][1][0][3] = new_box_coordinates[3]"

                        #--------------------- create tfexample -----------
                        b1_ulx = b1_uly = b1_brx = b1_bry = b2_ulx = b2_uly = b2_brx = b2_bry = b3_ulx = b3_uly = b3_brx = b3_bry = b4_ulx = b4_uly = b4_brx = b4_bry = 0
                        for i, box in enumerate(bboxes_coords):
                            if i == 0:
                                b1_ulx, b1_uly, b1_brx, b1_bry = box
                            elif i == 1:
                                b2_ulx, b2_uly, b2_brx, b2_bry = box
                            elif i == 2:
                                b3_ulx, b3_uly, b3_brx, b3_bry = box
                            elif i == 3:
                                b4_ulx, b4_uly, b4_brx, b4_bry = box
                            '''#  ----------- statistics ------------------
                    if(box[0]==0 and box[1]==0 and box[2]==0 and box[3]==0):
                        continue
                    xMax = max(xMax, box[2])
                    yMax = max(yMax, box[3])
                    
                    if xMin==0:
                        xMin = box[0]
                        yMin = box[1]
                    else:
                        xMin = min(xMin, box[0])
                        yMin = min(yMin, box[1])
                    
                    Area = (box[2] - box[0]) * (box[3] - box[1])
                                                        
                    if areaMax==0:
                        areaMax = Area
                        maxArea_coords = box
                    elif Area>areaMax:
                        areaMax = Area
                        maxArea_coords=box
                    
                    if areaMin==0:
                        areaMin = Area
                        minArea_coords=box
                    elif Area<areaMin:
                        areaMin = Area
                        minArea_coords=box'''

                        example = tf.train.Example(features=tf.train.Features(
                            feature={
                                #'imagedata': bytes_feature(image_data.tostring()),
                                'image_address':
                                bytes_feature(tf.compat.as_bytes(
                                    image_address)),
                                'tag':
                                int64_feature(1),
                                'box1_x0':
                                int64_feature(b1_ulx),
                                'box1_y0':
                                int64_feature(b1_uly),
                                'box1_x1':
                                int64_feature(b1_brx),
                                'box1_y1':
                                int64_feature(b1_bry),
                                'box2_x0':
                                int64_feature(b2_ulx),
                                'box2_y0':
                                int64_feature(b2_uly),
                                'box2_x1':
                                int64_feature(b2_brx),
                                'box2_y1':
                                int64_feature(b2_bry),
                                'box3_x0':
                                int64_feature(b3_ulx),
                                'box3_y0':
                                int64_feature(b3_uly),
                                'box3_x1':
                                int64_feature(b3_brx),
                                'box3_y1':
                                int64_feature(b3_bry),
                                'box4_x0':
                                int64_feature(b4_ulx),
                                'box4_y0':
                                int64_feature(b4_uly),
                                'box4_x1':
                                int64_feature(b4_brx),
                                'box4_y1':
                                int64_feature(b4_bry),
                            }))
                        '''# Rotating resized image and its boxes
                    #draw = ImageDraw.Draw(image)              
                    cx = int(((new_box_coordinates[2]-new_box_coordinates[0])/2)+new_box_coordinates[0])
                    cy = int(((new_box_coordinates[3]-new_box_coordinates[1])/2)+new_box_coordinates[1])
                    #  x0y1-------x0y3
                    #    |         |
                    #    |  cx,cy  |
                    #    |         |
                    #  x2,y1-----x2,y3
                    x0, y1, x2, y3 = new_box_coordinates[0], new_box_coordinates[1], new_box_coordinates[2], new_box_coordinates[3]            

                    angle=45
                    new_img = rotate(image, angle, reshape=False)
                    pil_img = Image.fromarray(new_img)
                    
                    polygon = gd.Rectangle((x0,y1),(x2,y3))
                    polygon.rotate(angle*math.pi/180,center=(cx,cy))
                    p0, p1, p2, p3 = polygon.points[0], polygon.points[1], polygon.points[2], polygon.points[3]
                    #  x0y0-------x1y1=p1
                    #    |         |
                    #    |  cx,cy  |
                    #    |         |
                    #  x3,y3-----x2,y2=p2
                    def bound_limitation(x,max_x):
                        if(x<0):
                        x=0
                        elif(x>max_x):
                        x = max_x
                        return x
                    
                    x0 = bound_limitation(p0[0],target_img_width)
                    x1 = bound_limitation(p1[0],target_img_width)
                    x2 = bound_limitation(p2[0],target_img_width)
                    x3 = bound_limitation(p3[0],target_img_width)
                    y0 = bound_limitation(p0[1],target_img_height)
                    y1 = bound_limitation(p1[1],target_img_height)
                    y2 = bound_limitation(p2[1],target_img_height)
                    y3 = bound_limitation(p3[1],target_img_height)

                    draw = ImageDraw.Draw(pil_img)
                    draw.line(((x0,y0),(x1,y1)))
                    draw.line(((x1,y1),(x2,y2)))
                    draw.line(((x2,y2),(x3,y3)))
                    draw.line(((x3,y3),(x0,y0)))
                    draw.rectangle((cx-3,cy-3,(cx+3,cy+3)), fill='white')
                    pil_img.show()'''

                        "image = np.asarray(image)"
                        "images = np.array([image])"

                        "images = np.expand_dims(images, axis=-1)"  ## need extra dimension of size 1 for grayscale
                        # ROTATING IMAGE IN DIFFERENT ANGLES AND ADD TO THE LAST COLUMN
                        "tiled = np.tile(np.expand_dims(images, 4), [len(ROTATIONS)])"
                        #angles=[0]
                        #for transformation_index, angle in enumerate(angles):
                        #  tiled[:,:,:,:, transformation_index] = rotate(tiled[:, :, :, :, transformation_index], angle, axes=[1, 2], reshape=False)'''
                        "example = image_to_tfexample(tiled,new_box_coordinates,1)"  # class_id = 1 is pedestrian
                        #example = image_to_tfexample(image_address,new_box_coordinates,1)
                        tfrecord_writer.write(example.SerializeToString())
                    tfrecord_writer.close()
    sys.stdout.write('\n')
    sys.stdout.flush()
def _format_data(sess, image_reader, folder_path, pairs, idx, labels, id_map, attr_onehot_mat, attr_w2v25_mat, 
                    attr_w2v50_mat, attr_w2v100_mat, attr_w2v150_mat, id_map_attr, all_peaks_dic, subsets_dic, 
                    seg_data_dir, FiltOutMissRegion=False, FLIP=False):
    # Read the filename:
    img_path_0 = os.path.join(folder_path, pairs[idx][0])
    img_path_1 = os.path.join(folder_path, pairs[idx][1])

    id_0 = pairs[idx][0][0:4]
    id_1 = pairs[idx][1][0:4]
    cam_0 = pairs[idx][0][6]
    cam_1 = pairs[idx][1][6]

    image_raw_0 = tf.gfile.FastGFile(img_path_0, 'r').read()
    image_raw_1 = tf.gfile.FastGFile(img_path_1, 'r').read()
    height, width = image_reader.read_image_dims(sess, image_raw_0)

    ########################## Attribute ##########################
    attrs_0 = []
    attrs_1 = []
    attrs_w2v25_0 = []
    attrs_w2v25_1 = []
    attrs_w2v50_0 = []
    attrs_w2v50_1 = []
    attrs_w2v100_0 = []
    attrs_w2v100_1 = []
    attrs_w2v150_0 = []
    attrs_w2v150_1 = []
    idx_0 = id_map_attr[id_0]
    idx_1 = id_map_attr[id_1]
    # pdb.set_trace()
    if attr_onehot_mat is not None:
        for name in attr_onehot_mat.dtype.names:
            attrs_0.append(attr_onehot_mat[(name)][0][0][0][idx_0])
            attrs_1.append(attr_onehot_mat[(name)][0][0][0][idx_1])
    if attr_w2v25_mat is not None:
        for i in xrange(attr_w2v25_mat[0].shape[0]):
            attrs_w2v25_0 = attrs_w2v25_0 + attr_w2v25_mat[0][i][idx_0].tolist()
            attrs_w2v25_1 = attrs_w2v25_1 + attr_w2v25_mat[0][i][idx_1].tolist()
    if attr_w2v50_mat is not None:
        for i in xrange(attr_w2v50_mat[0].shape[0]):
            attrs_w2v50_0 = attrs_w2v50_0 + attr_w2v50_mat[0][i][idx_0].tolist()
            attrs_w2v50_1 = attrs_w2v50_1 + attr_w2v50_mat[0][i][idx_1].tolist()
    if attr_w2v100_mat is not None:
        for i in xrange(attr_w2v100_mat[0].shape[0]):
            attrs_w2v100_0 = attrs_w2v100_0 + attr_w2v100_mat[0][i][idx_0].tolist()
            attrs_w2v100_1 = attrs_w2v100_1 + attr_w2v100_mat[0][i][idx_1].tolist()
    if attr_w2v150_mat is not None:
        for i in xrange(attr_w2v150_mat[0].shape[0]):
            attrs_w2v150_0 = attrs_w2v150_0 + attr_w2v150_mat[0][i][idx_0].tolist()
            attrs_w2v150_1 = attrs_w2v150_1 + attr_w2v150_mat[0][i][idx_1].tolist()

    ########################## Segment ##########################
    seg_0 = np.zeros([128,64])
    seg_1 = np.zeros([128,64])
    if seg_data_dir:
        path_0 = os.path.join(seg_data_dir, pairs[idx][0])
        path_1 = os.path.join(seg_data_dir, pairs[idx][1])
        if os.exists(path_0) and os.exists(path_1):
            seg_0 = scipy.misc.imread(path_0)
            seg_1 = scipy.misc.imread(path_1)
            if FLIP:
                # pdb.set_trace()
                seg_0 = np.fliplr(seg_0)
                seg_1 = np.fliplr(seg_1)
        else:
            return None

    ########################## Pose 16x8 & Pose coodinate (for 128x64(Solid) 128x64(Gaussian))##########################
    ## Pose 16x8
    w_unit = width/8
    h_unit = height/16
    pose_peaks_0 = np.zeros([16,8,18])
    pose_peaks_1 = np.zeros([16,8,18])
    ## Pose coodinate
    pose_peaks_0_rcv = np.zeros([18,3]) ## Row, Column, Visibility
    pose_peaks_1_rcv = np.zeros([18,3])
    #
    pose_subs_0 = []
    pose_subs_1 = []
    # pdb.set_trace()
    if (all_peaks_dic is not None) and (pairs[idx][0] in all_peaks_dic) and (pairs[idx][1] in all_peaks_dic):
        ###### Pose 0 ######
        peaks = _get_valid_peaks(all_peaks_dic[pairs[idx][0]], subsets_dic[pairs[idx][0]])
        indices_r4_0, values_r4_0, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid')
        indices_r4_0, shape_0 = _oneDimSparsePose(indices_r4_0, shape)
        pose_mask_r4_0 = _getPoseMask(peaks, height, width, radius=4, mode='Solid')
        pose_mask_r7_0 = _getPoseMask(peaks, height, width, radius=7, mode='Solid')
        for ii in range(len(peaks)):
            p = peaks[ii]
            if 0!=len(p):
                pose_peaks_0[int(p[0][1]/h_unit), int(p[0][0]/w_unit), ii] = 1
                pose_peaks_0_rcv[ii][0] = p[0][1]
                pose_peaks_0_rcv[ii][1] = p[0][0]
                pose_peaks_0_rcv[ii][2] = 1
        ## Generate body region proposals
        # part_bbox_list_0, visibility_list_0 = get_part_bbox7(peaks, img_path_0, radius=6, idx=idx)
        part_bbox_list_0, visibility_list_0 = get_part_bbox37(peaks, img_path_0, radius=6)
        if FiltOutMissRegion and (0 in visibility_list_0):
            return None

        ###### Pose 1 ######
        peaks = _get_valid_peaks(all_peaks_dic[pairs[idx][1]], subsets_dic[pairs[idx][1]])
        indices_r4_1, values_r4_1, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid')
        indices_r4_1, shape_1 = _oneDimSparsePose(indices_r4_1, shape)
        pose_mask_r4_1 = _getPoseMask(peaks, height, width, radius=4, mode='Solid')
        pose_mask_r7_1 = _getPoseMask(peaks, height, width, radius=7, mode='Solid')
        ## Generate body region proposals
        # part_bbox_list_1, visibility_list_1 = get_part_bbox7(peaks, img_path_1, radius=7)
        part_bbox_list_1, visibility_list_1 = get_part_bbox37(peaks, img_path_0, radius=6)
        if FiltOutMissRegion and (0 in visibility_list_1):
            return None

        ###### Visualize ######
        # dense = _sparse2dense(indices_r4_0, values_r4_0, shape)
        # _visualizePose(pose_mask_r4_0, scipy.misc.imread(img_path_0))
        # _visualizePose(pose_mask_r7_0, scipy.misc.imread(img_path_0))
        # pdb.set_trace()

        for ii in range(len(peaks)):
            p = peaks[ii]
            if 0!=len(p):
                pose_peaks_1[int(p[0][1]/h_unit), int(p[0][0]/w_unit), ii] = 1
                pose_peaks_1_rcv[ii][0] = p[0][1]
                pose_peaks_1_rcv[ii][1] = p[0][0]
                pose_peaks_1_rcv[ii][2] = 1
        pose_subs_0 = subsets_dic[pairs[idx][0]][0].tolist()
        pose_subs_1 = subsets_dic[pairs[idx][1]][0].tolist()
    else:
        return None


    example = tf.train.Example(features=tf.train.Features(feature={
            'image_name_0': dataset_utils.bytes_feature(pairs[idx][0]),
            'image_name_1': dataset_utils.bytes_feature(pairs[idx][1]),
            'image_raw_0': dataset_utils.bytes_feature(image_raw_0),
            'image_raw_1': dataset_utils.bytes_feature(image_raw_1),
            'label': dataset_utils.int64_feature(labels[idx]),
            'id_0': dataset_utils.int64_feature(id_map[id_0]),
            'id_1': dataset_utils.int64_feature(id_map[id_1]),
            'cam_0': dataset_utils.int64_feature(int(cam_0)),
            'cam_1': dataset_utils.int64_feature(int(cam_1)),
            'image_format': dataset_utils.bytes_feature('jpg'),
            'image_height': dataset_utils.int64_feature(height),
            'image_width': dataset_utils.int64_feature(width),
            'real_data': dataset_utils.int64_feature(1),
            'attrs_0': dataset_utils.int64_feature(attrs_0),
            'attrs_1': dataset_utils.int64_feature(attrs_1),
            'attrs_w2v25_0': dataset_utils.float_feature(attrs_w2v25_0),
            'attrs_w2v25_1': dataset_utils.float_feature(attrs_w2v25_1),
            'attrs_w2v50_0': dataset_utils.float_feature(attrs_w2v50_0),
            'attrs_w2v50_1': dataset_utils.float_feature(attrs_w2v50_1),
            'attrs_w2v100_0': dataset_utils.float_feature(attrs_w2v100_0),
            'attrs_w2v100_1': dataset_utils.float_feature(attrs_w2v100_1),
            'attrs_w2v150_0': dataset_utils.float_feature(attrs_w2v150_0),
            'attrs_w2v150_1': dataset_utils.float_feature(attrs_w2v150_1),
            'pose_peaks_0': dataset_utils.float_feature(pose_peaks_0.flatten().tolist()),
            'pose_peaks_1': dataset_utils.float_feature(pose_peaks_1.flatten().tolist()),
            'pose_peaks_0_rcv': dataset_utils.float_feature(pose_peaks_0_rcv.flatten().tolist()),
            'pose_peaks_1_rcv': dataset_utils.float_feature(pose_peaks_1_rcv.flatten().tolist()),
            'pose_mask_r4_0': dataset_utils.int64_feature(pose_mask_r4_0.astype(np.int64).flatten().tolist()),
            'pose_mask_r4_1': dataset_utils.int64_feature(pose_mask_r4_1.astype(np.int64).flatten().tolist()),
            'pose_mask_r6_0': dataset_utils.int64_feature(pose_mask_r7_0.astype(np.int64).flatten().tolist()),
            'pose_mask_r6_1': dataset_utils.int64_feature(pose_mask_r7_1.astype(np.int64).flatten().tolist()),
            'seg_0': dataset_utils.int64_feature(seg_0.astype(np.int64).flatten().tolist()),
            'seg_1': dataset_utils.int64_feature(seg_1.astype(np.int64).flatten().tolist()),

            'shape': dataset_utils.int64_feature(shape_0),
            
            'indices_r4_0': dataset_utils.int64_feature(np.array(indices_r4_0).astype(np.int64).flatten().tolist()),
            'values_r4_0': dataset_utils.float_feature(np.array(values_r4_0).astype(np.float).flatten().tolist()),
            'indices_r4_1': dataset_utils.int64_feature(np.array(indices_r4_1).astype(np.int64).flatten().tolist()),
            'values_r4_1': dataset_utils.float_feature(np.array(values_r4_1).astype(np.float).flatten().tolist()),

            'pose_subs_0': dataset_utils.float_feature(pose_subs_0),
            'pose_subs_1': dataset_utils.float_feature(pose_subs_1),

            'part_bbox_0': dataset_utils.int64_feature(np.array(part_bbox_list_0).astype(np.int64).flatten().tolist()),
            'part_bbox_1': dataset_utils.int64_feature(np.array(part_bbox_list_1).astype(np.int64).flatten().tolist()),
            'part_vis_0': dataset_utils.int64_feature(np.array(visibility_list_0).astype(np.int64).flatten().tolist()),
            'part_vis_1': dataset_utils.int64_feature(np.array(visibility_list_1).astype(np.int64).flatten().tolist()),
    }))

    return example
def _format_data(sess,
                 image_reader,
                 folder_path,
                 pairs,
                 i,
                 labels,
                 id_map,
                 attr_mat,
                 id_map_attr,
                 all_peaks_dic,
                 subsets_dic,
                 FiltOutMissRegion=False):
    # Read the filename:
    img_path_0 = os.path.join(folder_path, pairs[i][0])
    img_path_1 = os.path.join(folder_path, pairs[i][1])

    id_0 = pairs[i][0].split('_')[0]
    id_1 = pairs[i][1].split('_')[0]

    image_raw_0 = tf.gfile.FastGFile(img_path_0, 'r').read()
    image_raw_1 = tf.gfile.FastGFile(img_path_1, 'r').read()
    height, width = image_reader.read_image_dims(sess, image_raw_0)

    attrs_0 = []
    attrs_1 = []
    if attr_mat is not None:
        idx_0 = id_map_attr[id_0]
        idx_1 = id_map_attr[id_1]
        for name in attr_mat.dtype.names:
            attrs_0.append(attr_mat[(name)][0][0][0][idx_0])
            attrs_1.append(attr_mat[(name)][0][0][0][idx_1])

    ########################## Pose 16x8 & Pose coodinate (for 128x64(Solid) 128x64(Gaussian))##########################
    ## Pose 16x8
    w_unit = width / 16
    h_unit = height / 16
    pose_peaks_0 = np.zeros([16, 16, 18])
    pose_peaks_1 = np.zeros([16, 16, 18])
    ## Pose coodinate
    pose_peaks_0_rcv = np.zeros([18, 3])
    pose_peaks_1_rcv = np.zeros([18, 3])
    #
    pose_subs_0 = []
    pose_subs_1 = []
    # pdb.set_trace()
    if (all_peaks_dic is not None) and (pairs[i][0] in all_peaks_dic) and (
            pairs[i][1] in all_peaks_dic):
        ## Pose 0
        # peaks = all_peaks_dic[pairs[i][0]]
        peaks = _get_valid_peaks(all_peaks_dic[pairs[i][0]],
                                 subsets_dic[pairs[i][0]])
        # print(peaks)
        indices_r4_0, values_r4_0, shape = _getSparsePose(peaks,
                                                          height,
                                                          width,
                                                          18,
                                                          radius=4,
                                                          mode='Solid')
        indices_r4_0, shape_0 = _oneDimSparsePose(indices_r4_0, shape)
        indices_r8_0, values_r8_0, shape = _getSparsePose(peaks,
                                                          height,
                                                          width,
                                                          18,
                                                          radius=8,
                                                          mode='Solid')
        indices_r8_0, _ = _oneDimSparsePose(indices_r8_0, shape)
        # pose_dense_r4_0 = _sparse2dense(indices_r4_0, values_r4_0, shape)
        pose_mask_r4_0 = _getPoseMask(peaks,
                                      height,
                                      width,
                                      radius=4,
                                      mode='Solid')
        pose_mask_r8_0 = _getPoseMask(peaks,
                                      height,
                                      width,
                                      radius=8,
                                      mode='Solid')
        # indices_r6_v4_0, values_r6_v4_0, shape = _getSparsePose(peaks, height, width, 18, radius=6, var=4, mode='Gaussian')
        for ii in range(len(peaks)):
            p = peaks[ii]
            if 0 != len(p):
                pose_peaks_0[int(p[0][1] / h_unit),
                             int(p[0][0] / w_unit), ii] = 1
                pose_peaks_0_rcv[ii][0] = p[0][1]
                pose_peaks_0_rcv[ii][1] = p[0][0]
                pose_peaks_0_rcv[ii][2] = 1
        ## Generate body region proposals
        # part_bbox_list_0, visibility_list_0 = get_part_bbox(peaks, img_path_0, i)
        part_bbox_list_0, visibility_list_0 = get_part_bbox(peaks, img_path_0)
        if FiltOutMissRegion and (0 in visibility_list_0):
            return None
        roi_mask_list_0 = get_roi_mask(part_bbox_list_0, visibility_list_0)
        roi10_mask_0 = np.transpose(np.squeeze(np.array(roi_mask_list_0)),
                                    [1, 2, 0])

        ## Pose 1
        # peaks = all_peaks_dic[pairs[i][1]]
        peaks = _get_valid_peaks(all_peaks_dic[pairs[i][1]],
                                 subsets_dic[pairs[i][1]])
        indices_r4_1, values_r4_1, shape = _getSparsePose(peaks,
                                                          height,
                                                          width,
                                                          18,
                                                          radius=4,
                                                          mode='Solid')
        indices_r4_1, shape_1 = _oneDimSparsePose(indices_r4_1, shape)
        indices_r8_1, values_r8_1, shape = _getSparsePose(peaks,
                                                          height,
                                                          width,
                                                          18,
                                                          radius=8,
                                                          mode='Solid')
        indices_r8_1, _ = _oneDimSparsePose(indices_r8_1, shape)
        # pose_dense_r4_1 = _sparse2dense(indices_r4_1, values_r4_1, shape)
        pose_mask_r4_1 = _getPoseMask(peaks,
                                      height,
                                      width,
                                      radius=4,
                                      mode='Solid')
        pose_mask_r8_1 = _getPoseMask(peaks,
                                      height,
                                      width,
                                      radius=8,
                                      mode='Solid')
        # indices_r6_v4_1, values_r6_v4_1, shape = _getSparsePose(peaks, height, width, 18, radius=6, var=4, mode='Gaussian')
        ## Generate body region proposals
        part_bbox_list_1, visibility_list_1 = get_part_bbox(peaks, img_path_1)
        if FiltOutMissRegion and (0 in visibility_list_1):
            return None
        roi_mask_list_1 = get_roi_mask(part_bbox_list_1, visibility_list_1)
        roi10_mask_1 = np.transpose(np.squeeze(np.array(roi_mask_list_1)),
                                    [1, 2, 0])

        ###### Visualize ######
        # dense = _sparse2dense(indices_r4, values_r4, shape)
        # _visualizePose(pose_mask_r4_0, scipy.misc.imread(img_path_0))
        # _visualizePose(pose_mask_r4_1, scipy.misc.imread(img_path_1))
        # if i in [0,5]:
        #     _visualizePose(roi_mask_list_0[0], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[1], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[2], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[3], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[4], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[5], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[6], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[7], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[8], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[9], scipy.misc.imread(img_path_0))
        # pdb.set_trace()

        for ii in range(len(peaks)):
            p = peaks[ii]
            if 0 != len(p):
                pose_peaks_1[int(p[0][1] / h_unit),
                             int(p[0][0] / w_unit), ii] = 1
                pose_peaks_1_rcv[ii][0] = p[0][1]
                pose_peaks_1_rcv[ii][1] = p[0][0]
                pose_peaks_1_rcv[ii][2] = 1
        pose_subs_0 = subsets_dic[pairs[i][0]][0].tolist()
        pose_subs_1 = subsets_dic[pairs[i][1]][0].tolist()
    else:
        return None

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image_name_0':
            dataset_utils.bytes_feature(pairs[i][0]),
            'image_name_1':
            dataset_utils.bytes_feature(pairs[i][1]),
            'image_raw_0':
            dataset_utils.bytes_feature(image_raw_0),
            'image_raw_1':
            dataset_utils.bytes_feature(image_raw_1),
            'label':
            dataset_utils.int64_feature(labels[i]),
            'id_0':
            dataset_utils.int64_feature(id_map[id_0]),
            'id_1':
            dataset_utils.int64_feature(id_map[id_1]),
            'cam_0':
            dataset_utils.int64_feature(-1),
            'cam_1':
            dataset_utils.int64_feature(-1),
            'image_format':
            dataset_utils.bytes_feature('jpg'),
            'image_height':
            dataset_utils.int64_feature(height),
            'image_width':
            dataset_utils.int64_feature(width),
            'real_data':
            dataset_utils.int64_feature(1),
            'attrs_0':
            dataset_utils.int64_feature(attrs_0),
            'attrs_1':
            dataset_utils.int64_feature(attrs_1),
            'pose_peaks_0':
            dataset_utils.float_feature(pose_peaks_0.flatten().tolist()),
            'pose_peaks_1':
            dataset_utils.float_feature(pose_peaks_1.flatten().tolist()),
            'pose_peaks_0_rcv':
            dataset_utils.float_feature(pose_peaks_0_rcv.flatten().tolist()),
            'pose_peaks_1_rcv':
            dataset_utils.float_feature(pose_peaks_1_rcv.flatten().tolist()),
            # 'pose_dense_r4_0': dataset_utils.int64_feature(pose_dense_r4_0.astype(np.int64).flatten().tolist()),
            # 'pose_dense_r4_1': dataset_utils.int64_feature(pose_dense_r4_1.astype(np.int64).flatten().tolist()),
            'pose_mask_r4_0':
            dataset_utils.int64_feature(
                pose_mask_r4_0.astype(np.int64).flatten().tolist()),
            'pose_mask_r4_1':
            dataset_utils.int64_feature(
                pose_mask_r4_1.astype(np.int64).flatten().tolist()),
            'pose_mask_r8_0':
            dataset_utils.int64_feature(
                pose_mask_r8_0.astype(np.int64).flatten().tolist()),
            'pose_mask_r8_1':
            dataset_utils.int64_feature(
                pose_mask_r8_1.astype(np.int64).flatten().tolist()),
            'shape':
            dataset_utils.int64_feature(shape_0),

            # 'indices_r6_v4_0': dataset_utils.int64_feature(np.array(indices_r6_v4_0).astype(np.int64).flatten().tolist()),
            # 'values_r6_v4_0': dataset_utils.float_feature(np.array(values_r6_v4_0).astype(np.float).flatten().tolist()),
            # 'indices_r6_v4_1': dataset_utils.int64_feature(np.array(indices_r6_v4_1).astype(np.int64).flatten().tolist()),
            # 'values_r6_v4_1': dataset_utils.float_feature(np.array(values_r6_v4_1).astype(np.float).flatten().tolist()),
            'indices_r4_0':
            dataset_utils.int64_feature(
                np.array(indices_r4_0).astype(np.int64).flatten().tolist()),
            'values_r4_0':
            dataset_utils.float_feature(
                np.array(values_r4_0).astype(np.float).flatten().tolist()),
            'indices_r4_1':
            dataset_utils.int64_feature(
                np.array(indices_r4_1).astype(np.int64).flatten().tolist()),
            'values_r4_1':
            dataset_utils.float_feature(
                np.array(values_r4_1).astype(np.float).flatten().tolist()),
            'indices_r8_0':
            dataset_utils.int64_feature(
                np.array(indices_r8_0).astype(np.int64).flatten().tolist()),
            'values_r8_0':
            dataset_utils.float_feature(
                np.array(values_r8_0).astype(np.float).flatten().tolist()),
            'indices_r8_1':
            dataset_utils.int64_feature(
                np.array(indices_r8_1).astype(np.int64).flatten().tolist()),
            'values_r8_1':
            dataset_utils.float_feature(
                np.array(values_r8_1).astype(np.float).flatten().tolist()),
            'pose_subs_0':
            dataset_utils.float_feature(pose_subs_0),
            'pose_subs_1':
            dataset_utils.float_feature(pose_subs_1),
            'part_bbox_0':
            dataset_utils.int64_feature(
                np.array(part_bbox_list_0).astype(
                    np.int64).flatten().tolist()),
            'part_bbox_1':
            dataset_utils.int64_feature(
                np.array(part_bbox_list_1).astype(
                    np.int64).flatten().tolist()),
            'part_vis_0':
            dataset_utils.int64_feature(
                np.array(visibility_list_0).astype(
                    np.int64).flatten().tolist()),
            'part_vis_1':
            dataset_utils.int64_feature(
                np.array(visibility_list_1).astype(
                    np.int64).flatten().tolist()),
            'roi10_mask_0':
            dataset_utils.int64_feature(
                roi10_mask_0.astype(np.int64).flatten().tolist()),
            'roi10_mask_1':
            dataset_utils.int64_feature(
                roi10_mask_1.astype(np.int64).flatten().tolist()),
        }))

    return example
def _create_tf_example(image, annotations, image_dir):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys: [u'license', u'file_name', u'coco_url', u'height',
      u'width', u'date_captured', u'flickr_url', u'id']
    annotations: dict with objects (a list of image annotations) and a label.
      {u'objects':[{"area", "bbox" : [x,y,width,height}], u'label'}. Notice
      that bounding box coordinates in the COCO dataset are given as[x, y,
      width, height] tuples using absolute coordinates where x, y represent
      the top-left (0-indexed) corner. This function also converts to the format
      that can be used by the Tensorflow Object Detection API (which is [ymin,
      xmin, ymax, xmax] with coordinates normalized relative to image size).
    image_dir: directory containing the image files.
  Returns:
    tf_example: The converted tf.Example

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin, xmax, ymin, ymax, area = [], [], [], [], []
    for obj in annotations['objects']:
        (x, y, width, height) = tuple(obj['bbox'])
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        area.append(obj['area'])

    feature_dict = {
        'image/height':
        dataset_utils.int64_feature(image_height),
        'image/width':
        dataset_utils.int64_feature(image_width),
        'image/filename':
        dataset_utils.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_utils.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256':
        dataset_utils.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_utils.bytes_feature(encoded_jpg),
        'image/format':
        dataset_utils.bytes_feature('jpeg'.encode('utf8')),
        'image/class/label':
        dataset_utils.int64_feature(annotations['label']),
        'image/object/bbox/xmin':
        dataset_utils.float_list_feature(xmin),
        'image/object/bbox/xmax':
        dataset_utils.float_list_feature(xmax),
        'image/object/bbox/ymin':
        dataset_utils.float_list_feature(ymin),
        'image/object/bbox/ymax':
        dataset_utils.float_list_feature(ymax),
        'image/object/area':
        dataset_utils.float_list_feature(area),
    }
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
예제 #22
0
def _format_data(sess, image_reader, folder_path, pairs, i, labels, id_map, attr_mat, id_map_attr, 
                                    all_peaks_dic, subsets_dic, FiltOutMissRegion=False):
    # Read the filename:
    img_path_0 = os.path.join(folder_path, pairs[i][0])
    img_path_1 = os.path.join(folder_path, pairs[i][1])

    id_0 = pairs[i][0].split('_')[0]
    id_1 = pairs[i][1].split('_')[0]

    image_raw_0 = tf.gfile.FastGFile(img_path_0, 'r').read()
    image_raw_1 = tf.gfile.FastGFile(img_path_1, 'r').read()
    height, width = image_reader.read_image_dims(sess, image_raw_0)

    attrs_0 = []
    attrs_1 = []
    if attr_mat is not None:
        idx_0 = id_map_attr[id_0]
        idx_1 = id_map_attr[id_1]
        for name in attr_mat.dtype.names:
            attrs_0.append(attr_mat[(name)][0][0][0][idx_0])
            attrs_1.append(attr_mat[(name)][0][0][0][idx_1])

    ########################## Pose 16x8 & Pose coodinate (for 128x64(Solid) 128x64(Gaussian))##########################
    ## Pose 16x8
    w_unit = width/16
    h_unit = height/16
    pose_peaks_0 = np.zeros([16,16,18])
    pose_peaks_1 = np.zeros([16,16,18])
    ## Pose coodinate
    pose_peaks_0_rcv = np.zeros([18,3])
    pose_peaks_1_rcv = np.zeros([18,3])
    #
    pose_subs_0 = []
    pose_subs_1 = []
    # pdb.set_trace()
    if (all_peaks_dic is not None) and (pairs[i][0] in all_peaks_dic) and (pairs[i][1] in all_peaks_dic):
        ## Pose 0
        # peaks = all_peaks_dic[pairs[i][0]]
        peaks = _get_valid_peaks(all_peaks_dic[pairs[i][0]], subsets_dic[pairs[i][0]])
        # print(peaks)
        indices_r4_0, values_r4_0, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid')
        indices_r4_0, shape_0 = _oneDimSparsePose(indices_r4_0, shape)
        indices_r8_0, values_r8_0, shape = _getSparsePose(peaks, height, width, 18, radius=8, mode='Solid')
        indices_r8_0, _ = _oneDimSparsePose(indices_r8_0, shape)
        # pose_dense_r4_0 = _sparse2dense(indices_r4_0, values_r4_0, shape)
        pose_mask_r4_0 = _getPoseMask(peaks, height, width, radius=4, mode='Solid')
        pose_mask_r8_0 = _getPoseMask(peaks, height, width, radius=8, mode='Solid')
        # indices_r6_v4_0, values_r6_v4_0, shape = _getSparsePose(peaks, height, width, 18, radius=6, var=4, mode='Gaussian')
        for ii in range(len(peaks)):
            p = peaks[ii]
            if 0!=len(p):
                pose_peaks_0[int(p[0][1]/h_unit), int(p[0][0]/w_unit), ii] = 1
                pose_peaks_0_rcv[ii][0] = p[0][1]
                pose_peaks_0_rcv[ii][1] = p[0][0]
                pose_peaks_0_rcv[ii][2] = 1
        ## Generate body region proposals
        # part_bbox_list_0, visibility_list_0 = get_part_bbox(peaks, img_path_0, i)
        part_bbox_list_0, visibility_list_0 = get_part_bbox(peaks, img_path_0)
        if FiltOutMissRegion and (0 in visibility_list_0):
            return None
        roi_mask_list_0 = get_roi_mask(part_bbox_list_0, visibility_list_0)
        roi10_mask_0 = np.transpose(np.squeeze(np.array(roi_mask_list_0)),[1,2,0])

        ## Pose 1
        # peaks = all_peaks_dic[pairs[i][1]]
        peaks = _get_valid_peaks(all_peaks_dic[pairs[i][1]], subsets_dic[pairs[i][1]])
        indices_r4_1, values_r4_1, shape = _getSparsePose(peaks, height, width, 18, radius=4, mode='Solid')
        indices_r4_1, shape_1 = _oneDimSparsePose(indices_r4_1, shape)
        indices_r8_1, values_r8_1, shape = _getSparsePose(peaks, height, width, 18, radius=8, mode='Solid')
        indices_r8_1, _ = _oneDimSparsePose(indices_r8_1, shape)
        # pose_dense_r4_1 = _sparse2dense(indices_r4_1, values_r4_1, shape)
        pose_mask_r4_1 = _getPoseMask(peaks, height, width, radius=4, mode='Solid')
        pose_mask_r8_1 = _getPoseMask(peaks, height, width, radius=8, mode='Solid')
        # indices_r6_v4_1, values_r6_v4_1, shape = _getSparsePose(peaks, height, width, 18, radius=6, var=4, mode='Gaussian')
        ## Generate body region proposals
        part_bbox_list_1, visibility_list_1 = get_part_bbox(peaks, img_path_1)
        if FiltOutMissRegion and (0 in visibility_list_1):
            return None
        roi_mask_list_1 = get_roi_mask(part_bbox_list_1, visibility_list_1)
        roi10_mask_1 = np.transpose(np.squeeze(np.array(roi_mask_list_1)),[1,2,0])

        ###### Visualize ######
        # dense = _sparse2dense(indices_r4, values_r4, shape)
        # _visualizePose(pose_mask_r4_0, scipy.misc.imread(img_path_0))
        # _visualizePose(pose_mask_r4_1, scipy.misc.imread(img_path_1))
        # if i in [0,5]:
        #     _visualizePose(roi_mask_list_0[0], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[1], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[2], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[3], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[4], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[5], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[6], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[7], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[8], scipy.misc.imread(img_path_0))
        #     _visualizePose(roi_mask_list_0[9], scipy.misc.imread(img_path_0))
        # pdb.set_trace()

        for ii in range(len(peaks)):
            p = peaks[ii]
            if 0!=len(p):
                pose_peaks_1[int(p[0][1]/h_unit), int(p[0][0]/w_unit), ii] = 1
                pose_peaks_1_rcv[ii][0] = p[0][1]
                pose_peaks_1_rcv[ii][1] = p[0][0]
                pose_peaks_1_rcv[ii][2] = 1
        pose_subs_0 = subsets_dic[pairs[i][0]][0].tolist()
        pose_subs_1 = subsets_dic[pairs[i][1]][0].tolist()
    else:
        return None

    example = tf.train.Example(features=tf.train.Features(feature={
            'image_name_0': dataset_utils.bytes_feature(pairs[i][0]),
            'image_name_1': dataset_utils.bytes_feature(pairs[i][1]),
            'image_raw_0': dataset_utils.bytes_feature(image_raw_0),
            'image_raw_1': dataset_utils.bytes_feature(image_raw_1),
            'label': dataset_utils.int64_feature(labels[i]),
            'id_0': dataset_utils.int64_feature(id_map[id_0]),
            'id_1': dataset_utils.int64_feature(id_map[id_1]),
            'cam_0': dataset_utils.int64_feature(-1),
            'cam_1': dataset_utils.int64_feature(-1),
            'image_format': dataset_utils.bytes_feature('jpg'),
            'image_height': dataset_utils.int64_feature(height),
            'image_width': dataset_utils.int64_feature(width),
            'real_data': dataset_utils.int64_feature(1),
            'attrs_0': dataset_utils.int64_feature(attrs_0),
            'attrs_1': dataset_utils.int64_feature(attrs_1),
            'pose_peaks_0': dataset_utils.float_feature(pose_peaks_0.flatten().tolist()),
            'pose_peaks_1': dataset_utils.float_feature(pose_peaks_1.flatten().tolist()),
            'pose_peaks_0_rcv': dataset_utils.float_feature(pose_peaks_0_rcv.flatten().tolist()),
            'pose_peaks_1_rcv': dataset_utils.float_feature(pose_peaks_1_rcv.flatten().tolist()),
            # 'pose_dense_r4_0': dataset_utils.int64_feature(pose_dense_r4_0.astype(np.int64).flatten().tolist()),
            # 'pose_dense_r4_1': dataset_utils.int64_feature(pose_dense_r4_1.astype(np.int64).flatten().tolist()),
            'pose_mask_r4_0': dataset_utils.int64_feature(pose_mask_r4_0.astype(np.int64).flatten().tolist()),
            'pose_mask_r4_1': dataset_utils.int64_feature(pose_mask_r4_1.astype(np.int64).flatten().tolist()),
            'pose_mask_r8_0': dataset_utils.int64_feature(pose_mask_r8_0.astype(np.int64).flatten().tolist()),
            'pose_mask_r8_1': dataset_utils.int64_feature(pose_mask_r8_1.astype(np.int64).flatten().tolist()),

            'shape': dataset_utils.int64_feature(shape_0),

            # 'indices_r6_v4_0': dataset_utils.int64_feature(np.array(indices_r6_v4_0).astype(np.int64).flatten().tolist()),
            # 'values_r6_v4_0': dataset_utils.float_feature(np.array(values_r6_v4_0).astype(np.float).flatten().tolist()),
            # 'indices_r6_v4_1': dataset_utils.int64_feature(np.array(indices_r6_v4_1).astype(np.int64).flatten().tolist()),
            # 'values_r6_v4_1': dataset_utils.float_feature(np.array(values_r6_v4_1).astype(np.float).flatten().tolist()),
            
            'indices_r4_0': dataset_utils.int64_feature(np.array(indices_r4_0).astype(np.int64).flatten().tolist()),
            'values_r4_0': dataset_utils.float_feature(np.array(values_r4_0).astype(np.float).flatten().tolist()),
            'indices_r4_1': dataset_utils.int64_feature(np.array(indices_r4_1).astype(np.int64).flatten().tolist()),
            'values_r4_1': dataset_utils.float_feature(np.array(values_r4_1).astype(np.float).flatten().tolist()),
            'indices_r8_0': dataset_utils.int64_feature(np.array(indices_r8_0).astype(np.int64).flatten().tolist()),
            'values_r8_0': dataset_utils.float_feature(np.array(values_r8_0).astype(np.float).flatten().tolist()),
            'indices_r8_1': dataset_utils.int64_feature(np.array(indices_r8_1).astype(np.int64).flatten().tolist()),
            'values_r8_1': dataset_utils.float_feature(np.array(values_r8_1).astype(np.float).flatten().tolist()),

            'pose_subs_0': dataset_utils.float_feature(pose_subs_0),
            'pose_subs_1': dataset_utils.float_feature(pose_subs_1),

            'part_bbox_0': dataset_utils.int64_feature(np.array(part_bbox_list_0).astype(np.int64).flatten().tolist()),
            'part_bbox_1': dataset_utils.int64_feature(np.array(part_bbox_list_1).astype(np.int64).flatten().tolist()),
            'part_vis_0': dataset_utils.int64_feature(np.array(visibility_list_0).astype(np.int64).flatten().tolist()),
            'part_vis_1': dataset_utils.int64_feature(np.array(visibility_list_1).astype(np.int64).flatten().tolist()),
            'roi10_mask_0': dataset_utils.int64_feature(roi10_mask_0.astype(np.int64).flatten().tolist()),
            'roi10_mask_1': dataset_utils.int64_feature(roi10_mask_1.astype(np.int64).flatten().tolist()),
    }))

    return example
예제 #23
0
def _format_data(sess, image_reader, idx, tmp_dir, pathlist_A, pathlist_B,
                 pathlist_A_seg, pathlist_B_seg, pathlist_A_seg_class,
                 pathlist_B_seg_class, B_seg_valid_list):
    ## Resize and random flip
    # if np.random.rand()>0.5:
    #     IMG_FLIP = True
    # else:
    #     IMG_FLIP = False
    IMG_FLIP = False
    path_A = _img_resize_flip(pathlist_A[idx],
                              IMG_FLIP,
                              tmp_dir,
                              image_width=_IMG_WEIGHT,
                              image_height=_IMG_HEIGHT)
    path_B = _img_resize_flip(pathlist_B[idx],
                              IMG_FLIP,
                              tmp_dir,
                              image_width=_IMG_WEIGHT,
                              image_height=_IMG_HEIGHT)
    path_A_seg = _img_resize_flip(pathlist_A_seg[idx],
                                  IMG_FLIP,
                                  tmp_dir,
                                  image_width=_IMG_WEIGHT,
                                  image_height=_IMG_HEIGHT)
    path_B_seg = _img_resize_flip(pathlist_B_seg[idx],
                                  IMG_FLIP,
                                  tmp_dir,
                                  image_width=_IMG_WEIGHT,
                                  image_height=_IMG_HEIGHT)
    # pdb.set_trace()
    path_A_seg_class = _img_resize_flip(pathlist_A_seg_class[idx],
                                        IMG_FLIP,
                                        tmp_dir,
                                        image_width=_IMG_WEIGHT,
                                        image_height=_IMG_HEIGHT)
    path_B_seg_class = _img_resize_flip(pathlist_B_seg_class[idx],
                                        IMG_FLIP,
                                        tmp_dir,
                                        image_width=_IMG_WEIGHT,
                                        image_height=_IMG_HEIGHT)
    # nd_A_seg_class = _img_fliplr_oneHot_zoom(pathlist_A_seg_class[idx], IMG_FLIP, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT)
    # nd_B_seg_class = _img_fliplr_oneHot_zoom(pathlist_B_seg_class[idx], IMG_FLIP, image_width=_IMG_WEIGHT, image_height=_IMG_HEIGHT)

    image_raw_A = tf.gfile.FastGFile(path_A, 'r').read()
    image_raw_B = tf.gfile.FastGFile(path_B, 'r').read()
    image_raw_A_seg = tf.gfile.FastGFile(path_A_seg, 'r').read()
    image_raw_B_seg = tf.gfile.FastGFile(path_B_seg, 'r').read()
    image_raw_A_seg_class = tf.gfile.FastGFile(path_A_seg_class, 'r').read()
    image_raw_B_seg_class = tf.gfile.FastGFile(path_B_seg_class, 'r').read()

    height, width = image_reader.read_image_dims(sess, image_raw_A)
    # pdb.set_trace()
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image_name_A':
            dataset_utils.bytes_feature(pathlist_A[idx].split('/')[-1]),
            'image_name_B':
            dataset_utils.bytes_feature(pathlist_B[idx].split('/')[-1]),
            'image_raw_A':
            dataset_utils.bytes_feature(image_raw_A),
            'image_raw_B':
            dataset_utils.bytes_feature(image_raw_B),
            'image_raw_A_seg':
            dataset_utils.bytes_feature(image_raw_A_seg),
            'image_raw_B_seg':
            dataset_utils.bytes_feature(image_raw_B_seg),
            'image_raw_A_seg_class':
            dataset_utils.bytes_feature(image_raw_A_seg_class),
            'image_raw_B_seg_class':
            dataset_utils.bytes_feature(image_raw_B_seg_class),
            # 'image_raw_A_seg_class': dataset_utils.int64_feature(nd_A_seg_class.reshape(-1).tolist()),
            # 'image_raw_B_seg_class': dataset_utils.int64_feature(nd_B_seg_class.reshape(-1).tolist()),
            'image_format':
            dataset_utils.bytes_feature('png'),
            'image_height':
            dataset_utils.int64_feature(height),
            'image_width':
            dataset_utils.int64_feature(width),
            'A_seg_valid':
            dataset_utils.int64_feature(1),
            'B_seg_valid':
            dataset_utils.int64_feature(B_seg_valid_list[idx]),
        }))
    return example
def do_data_prep_with_tfrecord(raw_data,
                               tfrecord_filename,
                               _NUM_SHARDS,
                               dataset_directory_address,
                               num_train_data,
                               num_valid_data,
                               split_name='train'):
    num__per_shard = math.ceil(len(raw_data) / float(_NUM_SHARDS))
    with tf.Graph().as_default():
        with tf.Session() as sess:

            end_index = 0
            shard_id = -1
            index = 0
            Vindex = 0  # this variable is used to compute number of data that are written in validation tfrecord

            # Change of the following loops in order to adapt it for repetition of train data and extract validation data
            # Both validation and train data gathering can be done in one run
            # repeat data for two time to ve
            # Change of the following loops in order to adapt it for repetition of train data and extract validation data
            # Both validation and train data gathering can be done in one run
            # repeat data for two time to verify in training
            for image_file in data_raw.keys():
                if (shard_id < 0
                        or index >= end_index + 1) and index <= num_train_data:
                    shard_id += 1
                    end_index = min((shard_id + 1) * num__per_shard,
                                    len(raw_data))
                    output_filename = _get_dataset_filename(
                        dataset_directory_address,
                        split_name,
                        shard_id,
                        tfrecord_filename=tfrecord_filename,
                        _NUM_SHARDS=_NUM_SHARDS)

                    tfrecord_writer_train = tf.python_io.TFRecordWriter(
                        output_filename)

                    y_true_conf, y_true_loc, match_counter = find_gt_boxes(
                        data_raw, image_file)
                    print('size of y_true_conf: %d' % (len(y_true_conf)))
                    print('size of y_true_loc: %d' % (len(y_true_loc)))

                    if (match_counter > 0):
                        image = Image.open(
                            'Caltech pedestrian dataset/Caltech pedestrian dataset/data_train/images_640x480/'
                            + image_file[27:])
                        image = image.convert('L')
                        image.save('dataset/test/' + image_file[27:])

                        index += 1
                        if split_name == 'train':
                            index_angle = int(np.random.uniform(0, 5))
                            example = tf.train.Example(
                                features=tf.train.Features(
                                    feature={
                                        'image_address':
                                        bytes_feature(
                                            tf.compat.as_bytes(image_file)),
                                        'y_true_conf':
                                        tf.train.Feature(
                                            float_list=tf.train.FloatList(
                                                value=y_true_conf.flatten())),
                                        'y_true_loc':
                                        tf.train.Feature(
                                            float_list=tf.train.FloatList(
                                                value=y_true_loc.flatten())),
                                        'index_angle':
                                        tf.train.Feature(
                                            int64_list=tf.train.Int64List(
                                                value=[index_angle]))
                                    }))

                            tfrecord_writer_train.write(
                                example.SerializeToString())

                    sys.stdout.write('\r>> index: %d,   number of matchs: %d' %
                                     (index, match_counter))
                    sys.stdout.flush()

                else:

                    # dataset_directory_address + raw_data[i][0]
                    '''bboxes_coords = []
                                    for i, box in enumerate(raw_data[i][1]):
                                        bboxes_coords.append(box)'''
                    if (index <= num_train_data):
                        y_true_conf, y_true_loc, match_counter = find_gt_boxes(
                            data_raw, image_file)

                        if (match_counter > 0):
                            image = Image.open(
                                'Caltech pedestrian dataset/Caltech pedestrian dataset/data_train/images_640x480/'
                                + image_file[27:])
                            image = image.convert('L')
                            image.save('dataset/test/' + image_file[27:])

                            index += 1

                            if split_name == 'train':
                                index_angle = int(np.random.uniform(0, 5))
                                example = tf.train.Example(
                                    features=tf.train.Features(
                                        feature={
                                            'image_address':
                                            bytes_feature(
                                                tf.compat.as_bytes(
                                                    image_file)),
                                            'y_true_conf':
                                            tf.train.Feature(
                                                float_list=tf.train.FloatList(
                                                    value=y_true_conf.flatten(
                                                    ))),
                                            'y_true_loc':
                                            tf.train.Feature(
                                                float_list=tf.train.FloatList(
                                                    value=y_true_loc.flatten(
                                                    ))),
                                            'index_angle':
                                            tf.train.Feature(
                                                int64_list=tf.train.Int64List(
                                                    value=[index_angle]))
                                        }))

                                tfrecord_writer_train.write(
                                    example.SerializeToString())

                        sys.stdout.write(
                            '\r>> index: %d,  number of matchs: %d' %
                            (index, match_counter))
                        sys.stdout.flush()

                    else:
                        if (Vindex == 0):
                            tfrecord_writer_train.close()
                            print('\nStart to prepare validation data\n')

                            split_name = 'validation'
                            output_filename = _get_dataset_filename(
                                dataset_directory_address,
                                split_name,
                                0,
                                tfrecord_filename=tfrecord_filename,
                                _NUM_SHARDS=_NUM_SHARDS)
                            tfrecord_writer_validation = tf.python_io.TFRecordWriter(
                                output_filename)

                        y_true_conf, y_true_loc, match_counter = find_gt_boxes(
                            data_raw, image_file)

                        if (match_counter > 0):
                            image = Image.open(
                                'Caltech pedestrian dataset/Caltech pedestrian dataset/data_train/images_640x480/'
                                + image_file[27:])
                            image = image.convert('L')
                            image.save('dataset/test/' + image_file[27:])
                            index += 1
                            Vindex += 1
                            if split_name == 'validation':
                                index_angle = int(np.random.uniform(0, 5))
                                example = tf.train.Example(
                                    features=tf.train.Features(
                                        feature={
                                            'image_address':
                                            bytes_feature(
                                                tf.compat.as_bytes(
                                                    image_file)),
                                            'y_true_conf':
                                            tf.train.Feature(
                                                float_list=tf.train.FloatList(
                                                    value=y_true_conf.flatten(
                                                    ))),
                                            'y_true_loc':
                                            tf.train.Feature(
                                                float_list=tf.train.FloatList(
                                                    value=y_true_loc.flatten(
                                                    ))),
                                            'index_angle':
                                            tf.train.Feature(
                                                int64_list=tf.train.Int64List(
                                                    value=[index_angle]))
                                        }))

                                tfrecord_writer_validation.write(
                                    example.SerializeToString())

                                if (Vindex > num_valid_data):
                                    break

                        sys.stdout.write(
                            '\r>> index: %d,  number of matchs: %d' %
                            (index, match_counter))
                        sys.stdout.flush()

            if (Vindex == 0):
                tfrecord_writer_train.close()
            if (Vindex > 0):
                tfrecord_writer_validation.close()
            print('number of validation data: %d' % (index - Vindex))
def _convert_to_example(image_data, labels, labels_text, bboxes, shape,
                        difficult, truncated, oriented_bbox, ignored,
                        filename):
    """
	Build an Example proto for an image example.
	:param image_data: string, JPEG encoding of RGB image;
	:param labels: list of integers, identifier for the ground truth;
	:param labels_text: list of strings, human-readable labels;
	:param bboxes: list of bounding boxes; each box is a list of integers;
		  specifying [ymin, xmin, ymax, xmax]. All boxes are assumed to belong
		  to the same label as the image label.
	:param shape: 3 integers, image shapes in pixels.
	:param difficult: indicate whether the it is a text or not
	:param truncated:
	:param oriented_bbox: bounding box coordinate
	:param ignored:
	:param filename: image file name
	:return:
	"""
    xmin = []
    ymin = []
    xmax = []
    ymax = []
    for b in bboxes:
        assert len(b) == 4
        # pylint: disable=expression-not-assigned
        [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)]
        # pylint: enable=expression-not-assigned

    x1 = []
    x2 = []
    x3 = []
    x4 = []

    y1 = []
    y2 = []
    y3 = []
    y4 = []

    for orgin in oriented_bbox:
        assert len(orgin) == 8
        [
            l.append(point)
            for l, point in zip([x1, x2, x3, x4, y1, y2, y3, y4], orgin)
        ]

    image_format = b'JPEG'
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height': int64_feature(shape[0]),
            'image/width': int64_feature(shape[1]),
            'image/channels': int64_feature(shape[2]),
            'image/shape': int64_feature(shape),
            'image/filename': bytes_feature(filename.encode('utf-8')),
            'image/object/bbox/xmin': float_feature(xmin),
            'image/object/bbox/xmax': float_feature(xmax),
            'image/object/bbox/ymin': float_feature(ymin),
            'image/object/bbox/ymax': float_feature(ymax),
            'image/object/bbox/x1': float_feature(x1),
            'image/object/bbox/y1': float_feature(y1),
            'image/object/bbox/x2': float_feature(x2),
            'image/object/bbox/y2': float_feature(y2),
            'image/object/bbox/x3': float_feature(x3),
            'image/object/bbox/y3': float_feature(y3),
            'image/object/bbox/x4': float_feature(x4),
            'image/object/bbox/y4': float_feature(y4),
            'image/object/bbox/label': int64_feature(labels),
            'image/object/bbox/label_text': bytes_feature(labels_text),
            'image/object/bbox/difficult': int64_feature(difficult),
            'image/object/bbox/truncated': int64_feature(truncated),
            'image/object/bbox/ignored': int64_feature(ignored),
            'image/format': bytes_feature(image_format),
            'image/encoded': bytes_feature(image_data)
        }))
    return example
def image_to_tfrecord(video_folder, annotation_file, tfrecord_filename):
    """Convert Image to TFRecords.
		crop_folder_name = crop_names.txt
		crops_to_label = name : label
		Other than image name, label_text, tfrecord constains
		additional information include image, image shape, label_id. image and
		image shape are computed by reading image from disk, label_id is
		obtai pos_label=ned by a map "label_to_id" from label_text to label_id.
	"""
    # read the label file
    #filename_to_labels_dict = read_label_file(crops_to_label)
    video_image_list = os.listdir(video_folder)
    video_label_list = os.listdir(annotation_file)
    # define image decoding graph
    inputs = tf.placeholder(dtype=tf.string)
    decoded_jpg = tf.image.decode_jpeg(inputs)
    # open tfRecord reader
    with tf.python_io.TFRecordWriter(tfrecord_filename) as tfrecord_writer:
        # open a session for image decoding
        with tf.Session() as sess:
            for label_file in video_label_list:
                if label_file.startswith("._"):
                    continue
                print("VIDEO--------------------------- ", label_file)
                frame = ""
                label_path = annotation_file + "/" + label_file
                video_name = label_file.strip("\n").replace(".txt", "")
                with open(label_path, "r") as ff:
                    annotations = ff.readlines()
                    for line in annotations:
                        line = line.strip()
                        row = line.split(" ")
                        if frame == row[0]:
                            print("Appending")
                            if row[2] == "Car" or row[2] == "Van" or row[
                                    2] == "Truck":
                                label_class.append(int(1))
                            elif row[2] == "Pedestrian" or row[
                                    2] == "Person_sitting" or row[
                                        2] == "Cyclist":
                                label_class.append(int(2))
                            else:
                                label_class.append(int(0))
                            xmin.append(float(row[6]))
                            ymin.append(float(row[7]))
                            xmax.append(float(row[8]))
                            ymax.append(float(row[9]))

                        else:
                            if frame != "" and os.path.exists(image):

                                print('converting %s' % frame_name)
                                # read image
                                image_data = tf.gfile.FastGFile(image,
                                                                'rb').read()
                                # decode image
                                if frame_name.endswith(('png', 'PNG')):
                                    image_data_decoded = sess.run(
                                        decoded_jpg,
                                        feed_dict={inputs: image_data})
                                    image_format = b'PNG'
                                else:
                                    raise ValueError(
                                        "image%s is not supported" %
                                        frame_name)
                                shape = list(image_data_decoded.shape)
                                # create tf example
                                example = tf.train.Example(
                                    features=tf.train.Features(
                                        feature={
                                            'image/format':
                                            bytes_feature(image_format),
                                            'image/encoded':
                                            bytes_feature(image_data),
                                            'image/filename':
                                            bytes_feature(
                                                bytes(frame_name, 'utf-8')),
                                            #'image/key/sha256' : bytes_feature(shape),
                                            'image/source_id':
                                            bytes_feature(
                                                bytes(source_id, 'utf-8')),
                                            'image/height':
                                            int64_feature(shape[0]),
                                            'image/width':
                                            int64_feature(shape[1]),
                                            'bbox/xmin':
                                            float_feature(xmin),
                                            'bbox/xmax':
                                            float_feature(xmax),
                                            'bbox/ymin':
                                            float_feature(ymin),
                                            'bbox/ymax':
                                            float_feature(ymax),
                                            'bbox/label/index':
                                            int64_feature(label_class)
                                        }))
                                # write example
                                tfrecord_writer.write(
                                    example.SerializeToString())
                                print("FRAME FOUND")

                                xmin = []
                                ymin = []
                                xmax = []
                                ymax = []
                                label_class = []
                                frame = row[0]
                                source_id = row[0]
                                frame_name = row[0].zfill(6) + ".png"
                                image = video_folder + "/" + video_name + "/" + frame_name
                                if os.path.exists(image):
                                    if row[2] == "Car" or row[
                                            2] == "Van" or row[2] == "Truck":
                                        label_class.append(int(1))
                                    elif row[2] == "Pedestrian" or row[
                                            2] == "Person_sitting" or row[
                                                2] == "Cyclist":
                                        label_class.append(int(2))
                                    else:
                                        label_class.append(int(0))

                                    xmin.append(float(row[6]))
                                    ymin.append(float(row[7]))
                                    xmax.append(float(row[8]))
                                    ymax.append(float(row[9]))

                            else:
                                print("START FRAME")
                                xmin = []
                                ymin = []
                                xmax = []
                                ymax = []
                                label_class = []
                                frame = row[0]
                                source_id = row[0]
                                frame_name = row[0].zfill(6) + ".png"
                                image = video_folder + "/" + video_name + "/" + frame_name
                                if os.path.exists(image):
                                    if row[2] == "Car" or row[
                                            2] == "Van" or row[2] == "Truck":
                                        label_class.append(int(1))
                                    elif row[2] == "Pedestrian" or row[
                                            2] == "Person_sitting" or row[
                                                2] == "Cyclist":
                                        label_class.append(int(2))
                                    else:
                                        label_class.append(int(0))

                                    xmin.append(float(row[6]))
                                    ymin.append(float(row[7]))
                                    xmax.append(float(row[8]))
                                    ymax.append(float(row[9]))