Ejemplo n.º 1
0
def create_tf_example(group, path):
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class']))

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Ejemplo n.º 2
0
def group_to_tf_record(point, image_directory):
    format_png = b'png'
    format_jpg = b'jpeg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    class_nums = []
    class_ids = []
    # changed point[0] to point as is just one point
    image_id = point['id']

    if image_id.startswith('frame'):
        filename = os.path.join(image_directory, image_id + '.png')
        format = format_png
    else:
        filename = os.path.join(image_directory, image_id + '.jpg') #.decode()
        format = format_jpg

    try:
        image = Image.open(filename)
        width, height = image.size
        with tf.gfile.GFile(filename, 'rb') as fid:
            encoded_image = bytes(fid.read())
    except:
        return None
    key = hashlib.sha256(encoded_image).hexdigest()
    for anno in point['annotations']:
        xmins.append(float(anno['x0']))
        xmaxs.append(float(anno['x1']))
        ymins.append(float(anno['y0']))
        ymaxs.append(float(anno['y1']))
        class_nums.append(anno['class_num'])
        class_ids.append(bytes(anno['label'].encode('utf8')))
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/filename': dataset_util.bytes_feature(bytes(filename.encode('utf8'))),
        'image/source_id': dataset_util.bytes_feature(bytes(image_id.encode('utf8'))),
        'image/encoded': dataset_util.bytes_feature(encoded_image),
        'image/format': dataset_util.bytes_feature(format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(class_ids),
        'image/object/class/label': dataset_util.int64_list_feature(class_nums)
    }))
    return tf_example
def create_tf_example(example):
    
    # Udacity real data set
    height = 600 # Image height
    width = 800 # Image width

    filename = example['filename'] # Filename of the image. Empty if image is not from file
    filename = filename.encode()

    with tf.gfile.GFile(example['filename'], 'rb') as fid:
        encoded_image = fid.read()

    image_format = 'jpg'.encode() 

    xmins = [] # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = [] # List of normalized right x coordinates in bounding box
                # (1 per box)
    ymins = [] # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = [] # List of normalized bottom y coordinates in bounding box
                # (1 per box)
    classes_text = [] # List of string class name of bounding box (1 per box)
    classes = [] # List of integer class id of bounding box (1 per box)

    for box in example['annotations']:
        #if box['occluded'] is False:
        #print("adding box")
        xmins.append(float(box['xmin'] / width))
        xmaxs.append(float((box['xmin'] + box['x_width']) / width))
        ymins.append(float(box['ymin'] / height))
        ymaxs.append(float((box['ymin']+ box['y_height']) / height))
        classes_text.append(box['class'].encode())
        classes.append(int(LABEL_DICT[box['class']]))


    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_image),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))

    return tf_example
def dict_to_tf_example(data):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (image and and corresponding label)

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    # print (os.path.join(image_subdirectory, data['filename'] + '.bmp'))
    img_path = data['filename']
    with tf.gfile.GFile(img_path) as fid:
        encoded_img = fid.read()
    encoded_img_io = io.BytesIO(encoded_img)
    image = PIL.Image.open(encoded_img_io)
    if image.format == 'PNG':
        img_format = 'png'
    elif image.format == 'JPEG':
        img_format = 'jpeg'
    elif image.format == 'BMP':
        img_format = 'bmp'
    else:
        raise ValueError('Image format not PNG/JPEG/BMP')
    key = hashlib.sha256(encoded_img).hexdigest()

    (width, height) = image.size

    class_id = data['class_id']
    class_text = data['class_text']

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height': dataset_util.int64_feature(height),
            'image/width': dataset_util.int64_feature(width),
            'image/channels': dataset_util.int64_feature(3),
            'image/filename': dataset_util.bytes_feature(data['filename']),
            'image/source_id': dataset_util.bytes_feature(data['filename']),
            'image/key/sha256': dataset_util.bytes_feature(key),
            'image/encoded': dataset_util.bytes_feature(encoded_img),
            'image/format': dataset_util.bytes_feature(img_format),
            'image/class/text': dataset_util.bytes_feature(class_text),
            'image/class/label': dataset_util.int64_feature(class_id),
        }))
    return example
Ejemplo n.º 5
0
def main(unused_argv):
    writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
    count = 0

    with open(FLAGS.tags_file_path) as fo:
        for line in fo:
            filename, groundtruth_text = line.strip().split('	')
            if '.' in groundtruth_text:
                nop_gt = groundtruth_text.replace('.', '')
            else:
                nop_gt = groundtruth_text
            image_path = os.path.join(FLAGS.images_path, filename)
            print("filename: {}, gt: {}, nopoint: {}".format(
                filename, groundtruth_text, nop_gt))
            # image_path = line.strip()
            # filename = '/'.join(line.strip().split('/')[-2:])
            # groundtruth_text = line.split('_')[1]
            try:
                height, width, channel = cv2.imread(image_path).shape
                image_bin = open(image_path, 'rb').read()
            except Exception as e:
                print(e)
                continue

            example = tf.train.Example(features=tf.train.Features(
                feature={
                    fields.TfExampleFields.image_encoded:
                    dataset_util.bytes_feature(image_bin),
                    fields.TfExampleFields.height:
                    dataset_util.int64_feature(height),
                    fields.TfExampleFields.width:
                    dataset_util.int64_feature(width),
                    fields.TfExampleFields.filename:
                    dataset_util.bytes_feature(filename.encode()),
                    fields.TfExampleFields.transcript:
                    dataset_util.bytes_feature(groundtruth_text.encode()),
                    fields.TfExampleFields.transcript2:
                    dataset_util.bytes_feature(nop_gt.encode())
                }))

            writer.write(example.SerializeToString())
            count += 1
            if count % 1000 == 0:
                print(count)

    writer.close()
    print('{} example finished!'.format(count))
Ejemplo n.º 6
0
def create_tf_example(filename):
    coordinates = filename.rsplit('/', 1)[-1].rsplit('.', 1)[0].split('-')[2]
    leftUp, rightDown = [[int(eel) for eel in el.split('&')]
                         for el in coordinates.split('_')]
    xmin, ymin = leftUp
    xmax, ymax = rightDown

    with tf.gfile.GFile(filename, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    height = image.height
    width = image.width
    key = hashlib.sha256(encoded_jpg).hexdigest()

    ymins = [float(ymin) / height]
    xmins = [float(xmin) / width]
    ymaxs = [float(ymax) / height]
    xmaxs = [float(xmax) / width]

    labels_text = ['vehicle plate'.encode('utf8')]
    labels = [2]

    # print("---------image size:",image.size)
    # print("---------xmin:{}, ymin:{}, xmax:{}, ymax:{}".format(xmin,ymin,xmax,ymax))
    # print("---------width:{}, height:{}".format(width,height))

    feature_dict = {
        'image/height': dataset_util.int64_feature(int(height)),
        'image/width': dataset_util.int64_feature(int(width)),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        # 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')),
        # 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text':
        dataset_util.bytes_list_feature(labels_text),
        'image/object/class/label': dataset_util.int64_list_feature(labels),
    }
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
def make_example(filename, label):
    with tf.gfile.GFile(filename, 'rb') as fid:
        image = fid.read()
    return tf.train.Example(features=tf.train.Features(
        feature={
            'label': dataset_util.int64_feature(label),
            'image': dataset_util.bytes_feature(image)
        }))
def create_tfdatapoint(file_loc, file, labels):
    img = Image.open(os.path.join(file_loc, 'images', file))
    (width, height) = img.size
    encoded = tf.io.gfile.GFile(os.path.join(file_loc, 'images', file),
                                "rb").read()
    encoded = bytes(encoded)
    image_format = b'png'
    filename = file.split('.')[0]
    data = np.genfromtxt(os.path.join(file_loc, 'labels', filename + '.txt'))
    data = data.reshape(int(data.size / 5), 5)

    classes = [int(x) for x in data[:, 0]]
    classes_text = [labels[x].encode('utf8') for x in classes]
    xmins = data[:, 1] - (data[:, 3] / 2.0)
    xmaxs = data[:, 1] + (data[:, 3] / 2.0)
    ymins = data[:, 2] - (data[:, 4] / 2.0)
    ymaxs = data[:, 2] + (data[:, 4] / 2.0)

    tf_label_and_data = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(str.encode(filename)),
            'image/source_id':
            dataset_util.bytes_feature(str.encode(filename)),
            'image/encoded':
            dataset_util.bytes_feature(encoded),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_label_and_data
def create_tf_example(group, path):
    # Opening and readinf the files
    with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)),
                        'rb') as fid:
        encoded_jpg = fid.read()
    # Encode the image in jpeg format to array values
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    # Setting up the image size
    width, height = image.size

    #Creating the boundary box coordinate instances such as xmin,ymin,xmax,ymax
    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'])
        xmaxs.append(row['xmax'])
        ymins.append(row['ymin'])
        ymaxs.append(row['ymax'])
        classes.append(row['class'].encode('utf8'))

    # This is already exisiting code to convert csv to tfrecord
    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height': dataset_util.int64_feature(height),
            'image/width': dataset_util.int64_feature(width),
            'image/filename': dataset_util.bytes_feature(filename),
            'image/source_id': dataset_util.bytes_feature(filename),
            'image/encoded': dataset_util.bytes_feature(encoded_jpg),
            'image/format': dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
            'image/object/class/label': dataset_util.bytes_list_feature(
                classes),
        }))
    return tf_example
Ejemplo n.º 10
0
def dict_to_coco_example(img_data):
    """Convert python dictionary formath data of one image to tf.Example proto.
    Args:
        img_data: infomation of one image, inclue bounding box, labels of bounding box,\
            height, width, encoded pixel data.
    Returns:
        example: The converted tf.Example
    """
    bboxes = img_data['bboxes']
    xmin, xmax, ymin, ymax = [], [], [], []
    for bbox in bboxes:
        xmin.append(bbox[2])
        xmax.append(bbox[0])
        ymin.append(bbox[3])
        ymax.append(bbox[1])
    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(img_data['height']),
            'image/width':
            dataset_util.int64_feature(img_data['width']),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/label':
            dataset_util.int64_list_feature(img_data['labels']),
            'image/object/class/text':
            dataset_util.bytes_list_feature(img_data['text']),
            'image/encoded':
            dataset_util.bytes_feature(img_data['pixel_data']),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf-8')),
            'image/object/class/file':
            dataset_util.bytes_feature(img_data['file'].encode('utf-8')),
        }))
    return example
Ejemplo n.º 11
0
def __create_tf_example(frame_data, sorted_label_list):
    im = PIL.Image.open(io.BytesIO(frame_data.image))
    arr = io.BytesIO()
    if frame_data.format == 'jpg':
      format = 'JPEG'
    else:
      format = frame_data.format.upper()
    im.save(arr, format=format)
    height = im.height
    width = im.width
    encoded_image_data = arr.getvalue()
    rects, labels = bbox_writer.convert_text_to_rects_and_labels(frame_data.bboxes_text)
    # List of normalized coordinates, 1 per box, capped to [0, 1]
    xmins = [max(min(rect[0] / width, 1), 0) for rect in rects] # left x
    xmaxs = [max(min(rect[2] / width, 1), 0) for rect in rects] # right x
    ymins = [max(min(rect[1] / height, 1), 0) for rect in rects] # top y
    ymaxs = [max(min(rect[3] / height, 1), 0) for rect in rects] # bottom y

    classes_txt = [label.encode('utf-8') for label in labels] # String names
    label_to_id_dict = {label: i for i, label in enumerate(sorted_label_list)}
    class_ids = [label_to_id_dict[label] for label in labels]

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(frame_data.filename.encode('utf-8')),
        'image/source_id': dataset_util.bytes_feature(frame_data.filename.encode('utf-8')),
        'image/encoded': dataset_util.bytes_feature(encoded_image_data),
        'image/format': dataset_util.bytes_feature(frame_data.format.encode('utf-8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_txt),
        'image/object/class/label': dataset_util.int64_list_feature(class_ids),
    }))
    label_counter_for_frame = collections.Counter(labels)
    is_negative = len(rects) == 0
    return tf_example, label_counter_for_frame, is_negative
Ejemplo n.º 12
0
def create_tf_example(height, width, filename, encoded_image_data,
                      image_format, xmins, xmaxs, ymins, ymaxs, classes_text,
                      classes):
    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),  # Image height
            'image/width':
            dataset_util.int64_feature(width),  # Image width
            'image/filename':
            dataset_util.bytes_feature(filename),  # Filename of the image
            'image/source_id':
            dataset_util.bytes_feature(filename),  # Filename of the image
            'image/encoded':
            dataset_util.bytes_feature(
                encoded_image_data),  # Encoded image bytes
            'image/format':
            dataset_util.bytes_feature(image_format),  # b'jpeg' or b'png'
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(
                xmins),  # normalized left x coordinate in bounding box
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(
                xmaxs),  # normalized right x coordinate in bounding box
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(
                ymins),  # normalized top y coordinate in bounding box
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(
                ymaxs),  # normalized bottom y coordinate in bounding box
            'image/object/class/text':
            dataset_util.bytes_list_feature(
                classes_text),  # string class name of bounding box
            'image/object/class/label':
            dataset_util.int64_list_feature(
                classes),  # integer class id of bounding box
        }))
    return tf_example
def dict_to_tf_example(data, label):
    with open(data, 'rb') as inf:
        encoded_data = inf.read()
    img_label = cv2.imread(label)
    img_mask = image2label(img_label)
    encoded_label = img_mask.astype(np.uint8).tobytes()

    height, width = img_label.shape[0], img_label.shape[1]
    if height < vgg_16.default_image_size or width < vgg_16.default_image_size:
        # 保证最后随机裁剪的尺寸
        return None
        fname = data[data.rfind('/') + 1:]
    feature_dict = {
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(fname.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_data),
        'image/label': dataset_util.bytes_feature(encoded_label),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
    }
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
Ejemplo n.º 14
0
def create_tf_example(input_features, image_dir='/'):
    image_path = input_features['image']
    label = input_features['label']
    image_path = os.path.join(image_dir, image_path)
    image = cv2.imread(image_path)
    encoded_jpg = cv2.imencode('.jpg', image)[1].tostring()
    feature_dict = {
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/label': dataset_util.int64_feature(label),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
    }
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
Ejemplo n.º 15
0
def create_tf_example(csv, img_dir):
    img_fname = csv[0]
    x1, y1, x2, y2 = list(map(int, csv[1:-1]))
    cls_idx = int(csv[-1])
    cls_text = config.CLASS_NAMES[cls_idx].encode('utf8')
    with tf.gfile.GFile(os.path.join(img_dir, img_fname), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    xmin = [x1 / width]
    xmax = [x2 / width]
    ymin = [y1 / height]
    ymax = [y2 / height]
    cls_text = [cls_text]
    cls_idx = [cls_idx]

    filename = img_fname.encode('utf8')
    image_format = b'jpg'

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text': dataset_util.bytes_list_feature(cls_text),
        'image/object/class/label': dataset_util.int64_list_feature(cls_idx),
    }))

    return tf_example
Ejemplo n.º 16
0
def dict_to_tf_example(data, label):
    print("data----", data)
    with open(data, 'rb') as inf:
        encoded_data = inf.read()
        #print("encoded_data----",encoded_data)
    img_label = cv2.imread(label)
    img_mask = image2label(img_label)
    encoded_label = img_mask.astype(np.uint8).tobytes()

    data_img = img_label.astype('int32')
    idx = (data_img[:, :, 2] * 256 + data_img[:, :, 1]) * 256 + data_img[:, :,
                                                                         0]

    height, width = img_label.shape[0], img_label.shape[1]
    if height < vgg_16.default_image_size or width < vgg_16.default_image_size:
        # 保证最后随机裁剪的尺寸
        return None

    # Your code here, fill the dict
    feature_dict = {
        'image/height':
        dataset_util.int64_feature(height),
        'image/width':
        dataset_util.int64_feature(width),
        'image/filename':
        dataset_util.bytes_feature(re.split('\/+', data)[-1].encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_data),
        'image/label':
        dataset_util.bytes_feature(encoded_label),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),  #自己写不行??
    }
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return example
def dict_to_coco_example(img_data):
    """Convert python dictionary formath data of one image to tf.Example proto.
    Args:
        img_data: infomation of one image, inclue bounding box, labels of bounding box,\
            height, width, encoded pixel data.
    Returns:
        example: The converted tf.Example
    """
    # bboxes = img_data['bboxes']
    # xmin, xmax, ymin, ymax = [], [], [], []
    # for bbox in bboxes:
    #     xmin.append(bbox[0])
    #     xmax.append(bbox[0] + bbox[2])
    #     ymin.append(bbox[1])
    #     ymax.append(bbox[1] + bbox[3])

    example = tf.train.Example(features=tf.train.Features(feature={
        'image/id': dataset_util.int64_feature(img_data['id']),
        'image/caption': dataset_util.bytes_list_feature(img_data['caption']),
        'image/encoded': dataset_util.bytes_feature(img_data['pixel_data'])
    }))
    return example
Ejemplo n.º 18
0
def _encode_image_to_tfrecord(image_path, category_id):

  with tf.io.gfile.GFile(image_path, 'rb') as fid:
    encoded_jpg = fid.read()

  image_name = image_path.split('/')[-1]

  feature_dict = {

    'image_name':
      dataset_util.bytes_feature(image_name.encode('utf8')),

    'encoded_image':
      dataset_util.bytes_feature(encoded_jpg),

    'category_id':
      dataset_util.int64_feature(category_id),

    'format':
      dataset_util.bytes_feature('jpeg'.encode('utf8')) 

  }

  return tf.train.Example(features=tf.train.Features(feature=feature_dict))
Ejemplo n.º 19
0
def json_to_record(j):
    assert (len(j["image_size"]) == 1)
    assert (len(j["categories"]) == len(j["annotations"]))

    image_size = j["image_size"][0]
    height = image_size["height"]
    width = image_size["width"]

    filename = os.path.basename(j["file"])

    # actual image bytes? refer to dataset_tools/create_pet_tf_record.py
    with tf.gfile.GFile(j["file"], "rb") as fid:
        encoded_jpg = fid.read()
        pass
    encoded_image_data = encoded_jpg
    image_format = b'jpeg'

    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []

    classes_text = []
    classes = []

    for annot in j["annotations"]:
        c_name = class_id_to_name(annot["class_id"])
        classes_text.append(c_name.encode("utf8"))
        # class_ids are indexed by 1 for tensorflow
        classes.append(annot["class_id"] + 1)
        corners = get_box_corners(annot)
        xmins.append(corners["xmin"] / width)
        xmaxs.append(corners["xmax"] / width)
        ymins.append(corners["ymin"] / height)
        ymaxs.append(corners["ymax"] / height)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode("utf8")),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode("utf8")),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    # print(tf_example)
    return tf_example
    pass
Ejemplo n.º 20
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):
    """Converts image and annotations to a tf.Example proto.

    Args:
      image: dict with keys:
        [u'license', u'file_name', u'coco_url', u'height', u'width',
        u'date_captured', u'flickr_url', u'id']
      annotations_list:
        list of dicts with keys:
        [u'segmentation', u'area', u'iscrowd', u'image_id',
        u'bbox', u'category_id', u'id']
        Notice that bounding box coordinates in the official COCO dataset are
        given as [x, y, width, height] tuples using absolute coordinates where
        x, y represent the top-left (0-indexed) corner.  This function converts
        to the format expected by the Tensorflow Object Detection API (which is
        which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
        to image size).
      image_dir: directory containing the image files.
      category_index: a dict containing COCO category information keyed
        by the 'id' field of each category.  See the
        label_map_util.create_category_index function.
      include_masks: Whether to include instance segmentations masks
        (PNG encoded) in the result. default: False.
    Returns:
      example: The converted tf.Example
      num_annotations_skipped: Number of (invalid) annotations that were ignored.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    image_height = image['height']
    image_width = image['width']
    filename = image['file_name']
    image_id = image['id']

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    key = hashlib.sha256(encoded_jpg).hexdigest()

    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    for object_annotations in annotations_list:
        (x, y, width, height) = tuple(object_annotations['bbox'])
        if width <= 0 or height <= 0:
            num_annotations_skipped += 1
            continue
        if x + width > image_width or y + height > image_height:
            num_annotations_skipped += 1
            continue
        xmin.append(float(x) / image_width)
        xmax.append(float(x + width) / image_width)
        ymin.append(float(y) / image_height)
        ymax.append(float(y + height) / image_height)
        is_crowd.append(object_annotations['iscrowd'])
        category_id = int(object_annotations['category_id'])
        category_ids.append(category_id)
        category_names.append(
            category_index[category_id]['name'].encode('utf8'))
        area.append(object_annotations['area'])

        if include_masks:
            run_len_encoding = mask.frPyObjects(
                object_annotations['segmentation'], image_height, image_width)
            binary_mask = mask.decode(run_len_encoding)
            if not object_annotations['iscrowd']:
                binary_mask = np.amax(binary_mask, axis=2)
            pil_image = PIL.Image.fromarray(binary_mask)
            output_io = io.BytesIO()
            pil_image.save(output_io, format='PNG')
            encoded_mask_png.append(output_io.getvalue())
    feature_dict = {
        'image/height': dataset_util.int64_feature(image_height),
        'image/width': dataset_util.int64_feature(image_width),
        'image/filename': dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names),
        'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd),
        'image/object/area': dataset_util.float_list_feature(area),
    }
    if include_masks:
        feature_dict['image/object/mask'] = (
            dataset_util.bytes_list_feature(encoded_mask_png))
    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return key, example, num_annotations_skipped
Ejemplo n.º 21
0
    def _create_tf_entry(self, categories, img, label, filename, annotations):
        imageFormat = b'jpg'

        width, height = img.size

        imgByteArr = io.BytesIO()
        img.save(imgByteArr, format='JPEG')
        encodedImageData = imgByteArr.getvalue()

        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []

        for annotation in annotations:
            rect = None
            if type(
                    annotation.data
            ) is Rectangle:  #currently we only support Rect annotations, TODO: change me
                rect = annotation.data
            elif type(annotation.data) is Polygon:
                rect = annotation.data.rect

            if rect is not None:
                trimmed_rect = rect.trim(
                    Rectangle(0, 0, width, height)
                )  #scale to image dimension in case annotation exceeds image width/height

                if trimmed_rect.left < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect left dimension invalid! (<0)")
                if trimmed_rect.top < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect top dimension invalid! (<0)")
                if trimmed_rect.width < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect width dimension invalid! (<0)")
                if trimmed_rect.height < 0:
                    raise ImageMonkeyGeneralError(
                        "trimmed rect height dimension invalid! (<0)")

                if (trimmed_rect.left + trimmed_rect.width) > width:
                    raise ImageMonkeyGeneralError(
                        "bounding box width > image width!")
                if (trimmed_rect.top + trimmed_rect.height) > height:
                    raise ImageMonkeyGeneralError(
                        "bounding box height > image height!")

                xmin = trimmed_rect.left / float(width)
                xmax = (trimmed_rect.left + trimmed_rect.width) / float(width)
                ymin = trimmed_rect.top / float(height)
                ymax = (trimmed_rect.top + trimmed_rect.height) / float(height)

                #sanity checks
                if xmin > xmax:
                    raise ImageMonkeyGeneralError("xmin > xmax!")

                if ymin > ymax:
                    raise ImageMonkeyGeneralError("ymin > ymax!")

                if (xmin == 0) and (xmax == 0) and (ymin == 0) and (ymax == 0):
                    continue  #skip bounding boxes that are 0

                xmins.append(xmin)
                xmaxs.append(xmax)
                ymins.append(ymin)
                ymaxs.append(ymax)

        #we might have some images in our dataset, which don't have a annotation, skip those
        if ((len(xmins) == 0) or (len(xmaxs) == 0) or (len(ymins) == 0)
                or (len(ymaxs) == 0)):
            return None

        classes = [(categories.index(label) + 1)] * len(
            xmins)  #class indexes start with 1
        labels = [label.encode('utf8')] * len(xmins)

        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(filename.encode()),
                'image/source_id':
                dataset_util.bytes_feature(filename.encode()),
                'image/encoded':
                dataset_util.bytes_feature(encodedImageData),
                'image/format':
                dataset_util.bytes_feature(imageFormat),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(labels),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
            }))
        return tf_example
Ejemplo n.º 22
0
def create_tf_example(filename, label_file):
    img = cv2.imread(filename)
    height, width, channels = img.shape

    with tf.gfile.GFile(filename, 'rb') as fid:
        encoded_image_data = fid.read()

    image_format = b'jpg'

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per box)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)

    with open(label_file, 'r') as f:
        csvreader = csv.reader(f, delimiter=' ')
        head = True

        for row in csvreader:
            if head:
                head = False
                continue

            name = row[-1]
            classes_text.append(name)
            classes.append(get_index(name))

            xmins.append(float(row[0]) / width)
            xmaxs.append(float(row[2]) / width)
            ymins.append(float(row[1]) / height)
            ymaxs.append(float(row[3]) / height)

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename),
            'image/source_id':
            dataset_util.bytes_feature(filename),
            'image/encoded':
            dataset_util.bytes_feature(encoded_image_data),
            'image/format':
            dataset_util.bytes_feature(image_format),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))

    return tf_example
Ejemplo n.º 23
0
def xml_to_tf(path_input, path_output):
    xml_list = []
    column_name = [
        'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'
    ]

    print(path_output)
    writer = tf.io.TFRecordWriter(path_output)

    files = os.listdir(path_input)
    for file in files:
        if file.endswith(".xml"):
            xmlFile = path_input + file

            tree = ET.parse(xmlFile)
            root = tree.getroot()

            filename = root[1].text
            width = int(root[4][0].text)
            height = int(root[4][1].text)

            xmins = []
            xmaxs = []
            ymins = []
            ymaxs = []
            classes_text = []
            classes = []

            for member in root.findall('object'):
                beer = member[0].text
                xmin = int(member[4][0].text)
                ymin = int(member[4][1].text)
                xmax = int(member[4][2].text)
                ymax = int(member[4][3].text)

                xmins.append(xmin / width)
                xmaxs.append(xmax / width)
                ymins.append(ymin / height)
                ymaxs.append(ymax / height)
                classes_text.append(beer.encode('utf8'))
                classes.append(class_text_to_int(beer))

            with tf.io.gfile.GFile(
                    os.path.join(path_input, '{}'.format(filename)),
                    'rb') as fid:
                encoded_jpg = fid.read()
                print(encoded_jpg)

            tf_example = tf.train.Example(features=tf.train.Features(
                feature={
                    'image/height':
                    dataset_util.int64_feature(height),
                    'image/width':
                    dataset_util.int64_feature(width),
                    'image/filename':
                    dataset_util.bytes_feature(filename.encode('utf8')),
                    'image/source_id':
                    dataset_util.bytes_feature(filename.encode('utf8')),
                    'image/encoded':
                    dataset_util.bytes_feature(encoded_jpg),
                    'image/format':
                    dataset_util.bytes_feature(IMAGE_FORMAT),
                    'image/object/bbox/xmin':
                    dataset_util.float_list_feature(xmins),
                    'image/object/bbox/xmax':
                    dataset_util.float_list_feature(xmaxs),
                    'image/object/bbox/ymin':
                    dataset_util.float_list_feature(ymins),
                    'image/object/bbox/ymax':
                    dataset_util.float_list_feature(ymaxs),
                    'image/object/class/text':
                    dataset_util.bytes_list_feature(classes_text),
                    'image/object/class/label':
                    dataset_util.int64_list_feature(classes),
                }))

            writer.write(tf_example.SerializeToString())
    writer.close()
    output_path = os.path.join(os.getcwd(), path_output)
    print('Successfully created the TFRecords: {}'.format(output_path))
Ejemplo n.º 24
0
def dict_to_tf_example(data,
                       image_path,
                       label_map_dict,
                       ignore_difficult_instances=False,
                       image_subdirectory='images'):
    """Convert XML derived dict to tf.Example proto.

    Notice that this function normalizes the bounding box coordinates provided
    by the raw data.

    Args:
      data: dict holding PASCAL XML fields for a single image (obtained by
        running dataset_util.recursive_parse_xml_to_dict)
      image_path: Full path to image file
      label_map_dict: A map from string label names to integers ids.
      ignore_difficult_instances: Whether to skip difficult instances in the
        dataset  (default: False).
      image_subdirectory: String specifying subdirectory within the
        PASCAL dataset directory holding the actual image data.

    Returns:
      example: The converted tf.Example.

    Raises:
      ValueError: if the image pointed to by data['filename'] is not a valid JPEG
    """
    # img_path = os.path.join(
    #     data['folder'], image_subdirectory, data['filename'])
    # full_path = os.path.join(dataset_directory, img_path)
    full_path = image_path
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    width = int(data['size']['width'])
    height = int(data['size']['height'])

    filename = full_path.split('/')[-1]

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []
    if 'object' in data:
        for obj in data['object']:
            difficult = False  # bool(int(obj['difficult']))
            if ignore_difficult_instances and difficult:
                continue

            difficult_obj.append(int(difficult))

            xmin.append(float(obj['bndbox']['xmin']) / width)
            ymin.append(float(obj['bndbox']['ymin']) / height)
            xmax.append(float(obj['bndbox']['xmax']) / width)
            ymax.append(float(obj['bndbox']['ymax']) / height)
            classes_text.append(obj['name'].encode('utf8'))
            classes.append(label_map_dict[obj['name']])
            # truncated.append(int(obj['truncated']))
            truncated.append(0)
            # poses.append(obj['pose'].encode('utf8'))

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Ejemplo n.º 25
0
def background_tf_example(image_path, ):
    """
    Args:
      image_path: Full path to image file

    Returns:
      example: The converted tf.Example.
    """

    full_path = image_path
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = PIL.Image.open(encoded_jpg_io)
    if image.format != 'JPEG':
        raise ValueError('Image format not JPEG')
    key = hashlib.sha256(encoded_jpg).hexdigest()

    filename = full_path.split('/')[-1]
    width = image.width
    height = image.height

    xmin = []
    ymin = []
    xmax = []
    ymax = []
    classes = []
    classes_text = []
    truncated = []
    poses = []
    difficult_obj = []

    example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/key/sha256':
            dataset_util.bytes_feature(key.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature('jpeg'.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
            'image/object/difficult':
            dataset_util.int64_list_feature(difficult_obj),
            'image/object/truncated':
            dataset_util.int64_list_feature(truncated),
            'image/object/view':
            dataset_util.bytes_list_feature(poses),
        }))
    return example
Ejemplo n.º 26
0
def create_tf_example(image,
                      annotations_list,
                      image_dir,
                      category_index,
                      include_masks=False):

    # image_height = image[2]
    # image_width = image[1]
    # filename = image[0]# TODO(user): Populate the following variables from your example.
    # print(image)
    height = image['height']  # Image height
    width = image['width']  # Image width
    filename = image[
        'filename']  # Filename of the image. Empty if image is not from file

    full_path = os.path.join(image_dir, filename)
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_image_io = io.BytesIO(encoded_jpg)  # Encoded image bytes
    image = PIL.Image.open(encoded_image_io)
    only_file_name, image_format = os.path.splitext(filename)

    xmins = [
    ]  # List of normalized left x coordinates in bounding box (1 per box)
    xmaxs = []  # List of normalized right x coordinates in bounding box
    # (1 per box)
    ymins = [
    ]  # List of normalized top y coordinates in bounding box (1 per box)
    ymaxs = []  # List of normalized bottom y coordinates in bounding box
    # (1 per bo)
    classes_text = []  # List of string class name of bounding box (1 per box)
    classes = []  # List of integer class id of bounding box (1 per box)
    # print(len(annotations_list))
    for annotation in annotations_list:
        # print(annotation)
        xmins.append(annotation['xmin'] / width)
        xmaxs.append(annotation['xmax'] / width)
        ymins.append(annotation['ymin'] / height)
        ymaxs.append(annotation['ymax'] / height)
        classes_text.append(annotation['label_text'].encode('utf8'))
        classes.append(annotation['label'])

    tf_example = tf.train.Example(features=tf.train.Features(
        feature={
            'image/height':
            dataset_util.int64_feature(height),
            'image/width':
            dataset_util.int64_feature(width),
            'image/filename':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/source_id':
            dataset_util.bytes_feature(filename.encode('utf8')),
            'image/encoded':
            dataset_util.bytes_feature(encoded_jpg),
            'image/format':
            dataset_util.bytes_feature(image_format.encode('utf8')),
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmins),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmaxs),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymins),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymaxs),
            'image/object/class/text':
            dataset_util.bytes_list_feature(classes_text),
            'image/object/class/label':
            dataset_util.int64_list_feature(classes),
        }))
    return tf_example
Ejemplo n.º 27
0
def read_xml_make_tfrecord():
    num_data = 8
    for i in range(num_data):
        globals()['train_writer_{:05d}-of-{:05d}'.format(
            int(i), int(num_data))] = tensorflow.io.TFRecordWriter(
                'tfrecord/train/train.tfrecord-{:05d}-of-{:05d}'.format(
                    int(i), int(num_data)))

    for i in range(int(num_data / 8)):
        globals()['test_writer_{:05d}-of-{:05d}'.format(
            int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter(
                'tfrecord/test/test.tfrecord-{:05d}-of-{:05d}'.format(
                    int(i), int(num_data / 8)))
        globals()['valid_writer_{:05d}-of-{:05d}'.format(
            int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter(
                'tfrecord/valid/valid.tfrecord-{:05d}-of-{:05d}'.format(
                    int(i), int(num_data / 8)))

    length = len(os.listdir(folder))

    for number, img_name in enumerate(os.listdir(folder)):
        if img_name[-4:] != '.jpg': continue
        filename = img_name[:-4]
        img = cv2.imread(folder + filename + ".jpg")
        height, width = img.shape[:2]

        mask = cv2.imread('mask/' + filename + '.jpg', 0)
        mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((5, 5),
                                                               np.uint8))
        cv2.imshow("asdas", mask)
        cv2.waitKey()
        _, contours, _ = cv2.findContours(mask, cv2.RETR_TREE,
                                          cv2.CHAIN_APPROX_SIMPLE)
        print(contours)
        contours = sorted(contours, key=lambda x: len(x), reverse=True)

        x = [temp[0][0] for temp in contours[0]]
        y = [temp[0][1] for temp in contours[0]]
        xmin = min(x)
        xmax = max(x)
        ymin = min(y)
        ymax = max(y)
        # cv2.circle(img,(xmin,ymin),5,(255,0,0),5)
        # cv2.circle(img, (xmax, ymax), 5, (255, 0, 0), 5)
        # cv2.imshow("asd",img)
        # cv2.waitKey()
        object_name = 'passport'
        pixel_val = 255
        with tensorflow.io.gfile.GFile(folder + filename + ".jpg",
                                       'rb') as fid:
            encoded_image_data = fid.read()
        key = hashlib.sha256(encoded_image_data).hexdigest()

        with tensorflow.io.gfile.GFile('mask/' + filename + ".jpg",
                                       'rb') as fid:
            encoded_mask_data = fid.read()

        encoded_mask = io.BytesIO(encoded_mask_data)
        mask = Image.open(encoded_mask)
        mask_np = np.asarray(mask.convert('L'))
        mask_remapped = (mask_np == pixel_val).astype(np.uint8)
        # print("mask",mask_remapped.shape)
        # cv2.imshow("asd",mask_remapped*255)
        # cv2.waitKey()
        mask_img = Image.fromarray(mask_remapped)
        output = io.BytesIO()
        mask_img.save(output, format='PNG')

        xmins = [xmin / width]
        xmaxs = [xmax / width]
        ymins = [ymin / height]
        ymaxs = [ymax / height]
        classes_text = [object_name.encode('utf8')]
        classes = [1]
        masks = [output.getvalue()]

        print(img_name)
        print(xmins)
        print(xmaxs)
        print(ymins)
        print(ymaxs)
        print(classes_text)
        print(classes)
        print(masks)
        example = tensorflow.train.Example(features=tensorflow.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/filename':
                dataset_util.bytes_feature(img_name.encode('utf8')),
                'image/source_id':
                dataset_util.bytes_feature(img_name.encode('utf8')),
                'image/key/sha256':
                dataset_util.bytes_feature(key.encode('utf8')),
                'image/encoded':
                dataset_util.bytes_feature(encoded_image_data),
                'image/format':
                dataset_util.bytes_feature('jpeg'.encode('utf8')),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/text':
                dataset_util.bytes_list_feature(classes_text),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
                'image/object/mask':
                dataset_util.bytes_list_feature(masks),
            }))
        if number < length * 0.8:
            globals()['train_writer_{:05d}-of-{:05d}'.format(
                int(number / (length * 0.8) * num_data),
                int(num_data))].write(example.SerializeToString())

        elif number < length * 0.9:
            globals()['valid_writer_{:05d}-of-{:05d}'.format(
                int((number - length * 0.8) / (length * 0.1) * num_data / 8),
                int(num_data / 8))].write(example.SerializeToString())
        elif number < length:

            globals()['test_writer_{:05d}-of-{:05d}'.format(
                int((number - length * 0.9) / (length * 0.1) * num_data / 8),
                int(num_data / 8))].write(example.SerializeToString())
Ejemplo n.º 28
0
def dict_to_tf_example(writer, data):
    """Convert XML derived dict to tf.Example proto.

  Notice that this function normalizes the bounding box coordinates provided
  by the raw data.

  Args:
    data: dict holding PASCAL XML fields for a single image (obtained by
      running dataset_util.recursive_parse_xml_to_dict)

  Returns:
    example: The converted tf.Example.

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    full_path = os.path.join(data['folder'], data['filename']).replace(
        '/home/data/usrs/jiangyz/images', '/data1/chenyf')
    #print('full_path%s'%full_path)

    OriImg = PIL.Image.open(full_path)
    if (OriImg.mode != 'RGB'):
        OriImg = OriImg.convert("RGB")
        print(full_path + ' is not a rgb image, converting...')
    OriImgArray = np.asarray(OriImg)

    w = int(OriImgArray.shape[1])
    h = int(OriImgArray.shape[0])

    for obj in data['object']:
        difficult = bool(int(obj['difficult']))
        if difficult:
            print('there is a difficult instance.....')
            raw_input()
            continue

        left = int(obj['bndbox']['xmin'])
        top = int(obj['bndbox']['ymin'])
        right = int(obj['bndbox']['xmax'])
        down = int(obj['bndbox']['ymax'])
        # if (right-left)*(down-top)<w*h/4:
        #   continue

        difficult_obj = int(difficult)
        imgSinglePerson = OriImg
        imgSingle = np.asarray(imgSinglePerson)

        img_raw = imgSingle.tostring()
        classes_text = obj['name'].encode('utf8')
        classes = 0

        kp_cor_v = [int(x) for x in obj['keypoints']['visible']]

        truncated = int(obj['truncated'])
        poses = obj['pose'].encode('utf8')

        kpNum = FLAGS.kpNum

        if (sum(1 for x in kp_cor_v if x) < (kpNum + 1) / 2):
            continue

        kp_cor = []
        for tmp_id in range(kpNum):
            if kp_cor_v[tmp_id] != 0:
                #convert to imgSinglePerson
                xc = int(obj['keypoints']['x'][tmp_id])
                yc = int(obj['keypoints']['y'][tmp_id])
                kp_cor.append((xc, yc))
        global _all_num
        _all_num += 1
        #tf.summary.image('image',tf.convert_to_tensor(np.array([OriImgArray])),1)
        #show images

        #color = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(0,255,255),(255,0,255),(0,0,0)]
        heatmap = np.zeros([h, w, kpNum], np.float32)
        #print('shape of heatmap=',np.shape(heatmap))
        sigma = 10
        for idx, cor_xy in enumerate(kp_cor):
            cor_x, cor_y = cor_xy
            r = 36  # int(8/96.0*224)
            for ii in range(-r, r + 1, 1):
                for jj in range(-r, r + 1, 1):
                    xxxx = cor_x + ii
                    yyyy = cor_y + jj
                    if (xxxx < 0) or (yyyy < 0) or (xxxx > w - 1) or (yyyy >
                                                                      h - 1):
                        continue
                    heatmap[yyyy, xxxx, idx] += np.exp(-(ii * ii + jj * jj) /
                                                       (2 * sigma * sigma))
                    #heatmap[yyyy,xxxx]=255
        heatmap[heatmap > 1] = 1.0

        #print('length of heatmap=',len(hm_raw))
        example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/height':
                dataset_util.int64_feature(h),
                'image/width':
                dataset_util.int64_feature(w),
                'image/filename':
                dataset_util.bytes_feature(data['filename'].encode('utf8')),
                'image/source_id':
                dataset_util.bytes_feature(data['filename'].encode('utf8')),
                'image/encoded':
                dataset_util.bytes_feature(img_raw),
                'image/heatmap':
                dataset_util.float_list_feature(heatmap.flatten()),
                'image/keypointnumber':
                dataset_util.int64_feature(kpNum),
                'image/format':
                dataset_util.bytes_feature('jpeg'.encode('utf8')),
                'image/object/class/text':
                dataset_util.bytes_feature(classes_text),
                'image/object/class/label':
                dataset_util.int64_feature(classes),
                'image/object/difficult':
                dataset_util.int64_feature(difficult_obj),
                'image/object/truncated':
                dataset_util.int64_feature(truncated),
                'image/object/view':
                dataset_util.bytes_feature(poses),
            }))
        writer.write(example.SerializeToString())
Ejemplo n.º 29
0
def create_tf_example(image,
                      image_dir,
                      bbox_annotations=None,
                      category_index=None,
                      include_mask=False):
    """Converts image and annotations to a tf.Example proto.

  Args:
    image: dict with keys:
      [u'license', u'file_name', u'coco_url', u'height', u'width',
      u'date_captured', u'flickr_url', u'id', u'not_exhaustive_category_ids',
      u'neg_category_ids']
    image_dir: directory containing the image files.
    bbox_annotations:
      list of dicts with keys:
      [u'segmentation', u'area', u'image_id', u'bbox', u'category_id', u'id']
      Notice that bounding box coordinates in the official LVIS dataset are
      given as [x, y, width, height] tuples using absolute coordinates where
      x, y represent the top-left (0-indexed) corner.  This function converts
      to the format expected by the Tensorflow Object Detection API (which is
      which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
      to image size).
    category_index: a dict containing LVIS category information keyed
      by the 'id' field of each category.  See the
      label_map_util.create_category_index function.
    include_mask: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
  Returns:
    success: whether the conversion is successful
    filename: image filename
    example: The converted tf.Example

  Raises:
    ValueError: if the image pointed to by data['filename'] is not a valid JPEG
  """
    image_height = image['height']
    image_width = image['width']
    filename = image['coco_url']
    filename = osp.join(*filename.split('/')[-2:])

    image_id = image['id']
    image_not_exhaustive_category_ids = image['not_exhaustive_category_ids']
    image_neg_category_ids = image['neg_category_ids']

    full_path = os.path.join(image_dir, filename)
    if not tf.gfile.Exists(full_path):
        tf.logging.warn(f'image {full_path} not exists! skip')
        return False, None, None

    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()

    key = hashlib.sha256(encoded_jpg).hexdigest()
    feature_dict = {
        'image/height':
        dataset_util.int64_feature(image_height),
        'image/width':
        dataset_util.int64_feature(image_width),
        'image/filename':
        dataset_util.bytes_feature(filename.encode('utf8')),
        'image/source_id':
        dataset_util.bytes_feature(str(image_id).encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/not_exhaustive_category_ids':
        dataset_util.int64_list_feature(image_not_exhaustive_category_ids),
        'image/image_neg_category_ids':
        dataset_util.int64_list_feature(image_neg_category_ids),
    }

    if bbox_annotations:
        xmin = []
        xmax = []
        ymin = []
        ymax = []
        is_crowd = []
        category_names = []
        category_ids = []
        area = []
        encoded_mask_png = []
        for object_annotations in bbox_annotations:
            (x, y, width, height) = tuple(object_annotations['bbox'])

            xmin_single = max(float(x) / image_width, 0.0)
            xmax_single = min(float(x + width) / image_width, 1.0)
            ymin_single = max(float(y) / image_height, 0.0)
            ymax_single = min(float(y + height) / image_height, 1.0)
            if xmax_single <= xmin_single or ymax_single <= ymin_single:
                continue
            xmin.append(xmin_single)
            xmax.append(xmax_single)
            ymin.append(ymin_single)
            ymax.append(ymax_single)

            is_crowd.append(0)
            category_id = int(object_annotations['category_id'])
            category_ids.append(category_id)
            category_names.append(
                category_index[category_id]['name'].encode('utf8'))
            area.append(object_annotations['area'])

            if include_mask:
                run_len_encoding = mask.frPyObjects(
                    object_annotations['segmentation'], image_height,
                    image_width)
                binary_mask = mask.decode(run_len_encoding)
                binary_mask = np.amax(binary_mask, axis=2)
                pil_image = PIL.Image.fromarray(binary_mask)
                output_io = io.BytesIO()
                pil_image.save(output_io, format='PNG')
                encoded_mask_png.append(output_io.getvalue())

        feature_dict.update({
            'image/object/bbox/xmin':
            dataset_util.float_list_feature(xmin),
            'image/object/bbox/xmax':
            dataset_util.float_list_feature(xmax),
            'image/object/bbox/ymin':
            dataset_util.float_list_feature(ymin),
            'image/object/bbox/ymax':
            dataset_util.float_list_feature(ymax),
            'image/object/class/text':
            dataset_util.bytes_list_feature(category_names),
            'image/object/class/label':
            dataset_util.int64_list_feature(category_ids),
            'image/object/is_crowd':
            dataset_util.int64_list_feature(is_crowd),
            'image/object/area':
            dataset_util.float_list_feature(area),
        })
        if include_mask:
            feature_dict['image/object/mask'] = (
                dataset_util.bytes_list_feature(encoded_mask_png))

    example = tf.train.Example(features=tf.train.Features(
        feature=feature_dict))
    return True, filename, example
Ejemplo n.º 30
0
def create_tf_example():
    count = 0
    counter = 0
    writer = tf.python_io.TFRecordWriter(
        "/Data2TB/chl_data/rgb/train/augmented/train.record")  #output file

    #with open(filename) as f:
    #  content = f.readlines()
    #content = [x.strip() for x in content]
    #new_img = PIL.Image.new("L", (480, 640))
    #new_img.putdata(content)

    #with tf.gfile.GFile(filename, 'rb') as fid:
    #  encoded_jpg = fid.read()
    with open("/Data2TB/chl_data/rgb/train/augmented/train_pos_neg.json") as f:
        jsondata = json.load(f)
    for i in range(0, len(jsondata['frames'])):  #looping through JSON objects

        height = jsondata['frames'][i]["height"]  # Image height
        width = jsondata['frames'][i]["width"]  # Image width
        #filename = "/Data2TB/correctly_registered/augmented/combined/" + example # Filename of the image. Empty if image is not from file
        #encoded_image_data = None # Encoded image bytes
        filename_only = jsondata['frames'][i]['file']
        print(str(i) + ": " + filename_only)
        filename = "/Data2TB/chl_data/rgb/train/augmented/pos_neg_png/" + filename_only
        with tf.gfile.GFile(filename, 'rb') as fid:
            encoded_jpg = fid.read()
        xmins = []
        xmaxs = []
        ymins = []
        ymaxs = []

        classes_text = [
        ]  # List of string class name of bounding box (1 per box)
        classes = []  # List of integer class id of bounding box (1 per box)

        for j in range(0, len(jsondata['frames'][i]['annotations'])):
            if (jsondata['frames'][i]['annotations'][j]['label'] == 'Head'):
                xmin = (jsondata['frames'][i]['annotations'][j]['x']) / width
                xmax = (
                    jsondata['frames'][i]['annotations'][j]['x'] +
                    jsondata['frames'][i]['annotations'][j]['width']) / width
                ymin = (jsondata['frames'][i]['annotations'][j]['y']) / height
                ymax = (
                    jsondata['frames'][i]['annotations'][j]['y'] +
                    jsondata['frames'][i]['annotations'][j]['height']) / height
                if xmin > 1:
                    xmin = 1.0
                if xmax > 1:
                    xmax = 1.0
                if ymin > 1:
                    ymin = 1.0
                if ymax > 1:
                    ymax = 1.0
                if (xmin > 1 or xmax > 1 or ymin > 1 or ymax > 1):
                    print("UNNORMALIZED STUFF")
                xmins.append(xmin)
                xmaxs.append(xmax)
                ymins.append(ymin)
                ymaxs.append(ymax)
                classes_text.append('head')
                classes.append(1)
            #elif(jsondata['frames'][i]['annotations'][j]['label'] == 'Right Shoulder' or jsondata['frames'][i]['annotations'][j]['label'] == 'Left Shoulder'):
            #  xmin = (jsondata['frames'][i]['annotations'][j]['x'])
            #  ymin = (jsondata['frames'][i]['annotations'][j]['y'])
            #  if(xmin + 2 > width):
            #    xmin = width - 2
            #  if(ymin + 2 > height):
            #    ymin = height - 2
            #  xmax = xmin + 2
            #  ymax = ymin + 2
            #  xminf = xmin/width
            #  xmaxf = xmax/width
            #  yminf = ymin/height
            #  ymaxf = ymax/height


#
#  if(xminf > 1 or xmaxf > 1 or yminf >1 or ymaxf > 1):
#    print("UNNORMALIZED STUFF")
#  xmins.append(xminf)
#  xmaxs.append(xmaxf)
#  ymins.append(yminf)
#  ymaxs.append(ymaxf)
#  classes_text.append('shoulder')
#  classes.append(2)
        tf_example = tf.train.Example(features=tf.train.Features(
            feature={
                'image/filename':
                dataset_util.bytes_feature(str.encode(filename)),
                'image/height':
                dataset_util.int64_feature(height),
                'image/width':
                dataset_util.int64_feature(width),
                'image/encoded':
                dataset_util.bytes_feature(encoded_jpg),
                'image/object/bbox/xmin':
                dataset_util.float_list_feature(xmins),
                'image/object/bbox/xmax':
                dataset_util.float_list_feature(xmaxs),
                'image/object/bbox/ymin':
                dataset_util.float_list_feature(ymins),
                'image/object/bbox/ymax':
                dataset_util.float_list_feature(ymaxs),
                'image/object/class/label':
                dataset_util.int64_list_feature(classes),
            }))
        writer.write(tf_example.SerializeToString())
    writer.close()