def create_tf_example(f, image_path=None): xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] filename = f.readline().rstrip() filepath = os.path.join(image_path, filename) image_raw = cv2.imread(filepath) encoded_image_data = open(filepath, 'rb').read() key = hashlib.sha256(encoded_image_data).hexdigest() height, width, channel = image_raw.shape face_num = int(f.readline().rstrip()) valid_face_num = 0 for i in range(face_num): annot = f.readline().rstrip().split() # WIDER FACE DATASET CONTAINS SOME ANNOTATIONS WHAT EXCEEDS THE IMAGE BOUNDARY if (float(annot[2]) > 25.0): if (float(annot[3]) > 30.0): xmins.append(max(0.005, (float(annot[0]) / width))) ymins.append(max(0.005, (float(annot[1]) / height))) xmaxs.append( min(0.995, ((float(annot[0]) + float(annot[2])) / width))) ymaxs.append( min(0.995, ((float(annot[1]) + float(annot[3])) / height))) classes_text.append('face'.encode('utf8')) classes.append(1) print(xmins[-1], ymins[-1], xmaxs[-1], ymaxs[-1], classes_text[-1], classes[-1]) valid_face_num += 1 print("Face Number is %d" % face_num) print("Valid face number is %d" % valid_face_num) feature_dict = { 'image/height': dataset_util.int64_feature(int(height)), 'image/width': dataset_util.int64_feature(int(width)), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), # 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), # 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), } tf_example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return valid_face_num, tf_example
def create_tf_example(): height = 544 # Image height width = 960 # Image width filename = 'Image10.jpg' # Filename of the image. Empty if image is not from file filename=filename.encode() with tf.gfile.GFile("./Image10.jpg", 'rb') as fid: encoded_image = fid.read() image_format = b'jpg' # b'jpeg' or b'png' xmins = [458.0/960.0] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [807.0/960.0] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [157.0/544.0] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [360.0/544.0] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = b"library" # List of string class name of bounding box (1 per box) #classes_text=classes_text.encode("utf8") classes = "2" # List of integer class id of bounding box (1 per box) classes=classes.encode() tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), #'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_record(image_file, height, width, xmins, ymins, xmaxs, ymaxs, classes_id, classes_text): ''' param xmins: List of normalized left x coordinates in bounding box (1 per box) param xmaxs: List of normalized right x coordinates in bounding box (1 per box) param ymins: List of normalized top y coordinates in bounding box (1 per box) param ymaxs: List of normalized bottom y coordinates in bounding box (1 per box) param classes_text: List of string class name of bounding box (1 per box) param classes: List of integer class id of bounding box (1 per box) ''' image_data = tf.gfile.FastGFile(image_file, 'rb').read() encoded_image_data = tf.compat.as_bytes(image_data) # Encoded image bytes filename = tf.compat.as_bytes(image_file) image_format = 'png'.encode('utf8') # b'jpeg' or b'png' tf_single_dataset = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(os.path.basename(filename)), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes_id), })) return tf_single_dataset
def dict_to_coco_example(img_data): """Convert python dictionary formath data of one image to tf.Example proto. Args: img_data: infomation of one image, inclue bounding box, labels of bounding box,\ height, width, encoded pixel data. Returns: example: The converted tf.Example """ bboxes = img_data['bboxes'] xmin, xmax, ymin, ymax = [], [], [], [] for bbox in bboxes: xmin.append(bbox[0]) xmax.append(bbox[0] + bbox[2]) ymin.append(bbox[1]) ymax.append(bbox[1] + bbox[3]) example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(img_data['height']), 'image/width': dataset_util.int64_feature(img_data['width']), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/label': dataset_util.int64_list_feature(img_data['labels']), 'image/encoded': dataset_util.bytes_feature(img_data['pixel_data']), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf-8')), })) return example
def create_tf(input_path,filename,bb_list): height = size # Image height width = size # Image width encoded_image_data = tf.gfile.GFile(input_path+filename).read() # Encoded image bytes image_format = b'jpeg' # b'jpeg' or b'png' xmins = [i[0]/float(size) for i in bb_list] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [i[1]/float(size) for i in bb_list] # List of normalized right x coordinates in bounding box (1 per box) ymins = [i[2]/float(size) for i in bb_list] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [i[3]/float(size) for i in bb_list] # List of normalized bottom y coordinates in bounding box (1 per box) classes_text = ['ship'] * len(bb_list) # List of string class name of bounding box (1 per box) classes = [1] * len(bb_list) # List of integer class id of bounding box (1 per box) tf_image = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_image
def create_tf_example(filename, dirpath, boxes): #Read in the image with tf.gfile.GFile(os.path.join(dirpath, filename), 'rb') as fid: encoded_image_data = fid.read() encoded_jpg_io = io.BytesIO(encoded_image_data) image = Image.open(encoded_jpg_io) width, height = image.size _, ext = os.path.splitext(filename) image_format = ext[1:].encode('UTF-8') # b'jpeg' or b'png' xmins = [ x / width for x in boxes['xmins'] ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [ x / width for x in boxes['xmaxs'] ] # List of normalized right x coordinates in bounding box (1 per box) ymins = [ y / height for y in boxes['ymins'] ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [ y / height for y in boxes['ymaxs'] ] # List of normalized bottom y coordinates in bounding box (1 per box) #DEFAULT LABELS AND CLASSES FOR IVF classes_text = [b'sperm'] * len( xmins) # List of string class name of bounding box (1 per box) classes = [1] * len( xmins) # List of integer class id of bounding box (1 per box) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('UTF-8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('UTF-8')), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(group, path, labels_mapping): with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): class_idx = labels_mapping.get(row['class'], None) if class_idx is None: continue xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['class'].encode('utf8')) classes.append(class_idx) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(group, img_path): with tf.gfile.GFile(os.path.join(img_path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(row['label'].encode('utf8')) classes.append(class_text_to_int(row['label'])) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(image_file): print("opening {}".format(image_file)) with tf.gfile.GFile(image_file, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = os.path.basename(image_file).encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] xml_file = image_file + ".xml" tree = ET.parse(xml_file) root = tree.getroot() for member in root.findall('object'): classes_text.append(member[0].text.encode('utf-8')) classes.append(class_text_to_int(member[0].text)) xmins.append(int(member[5][0].text) / width) xmaxs.append(int(member[5][1].text) / width) ymins.append(int(member[5][2].text) / height) ymaxs.append(int(member[5][3].text) / height) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(image_data): height = image_data.height # Image height width = image_data.width # Image width with tf.gfile.GFile(image_data.path, 'rb') as fid: encoded_png = fid.read() filename = image_data.id.encode('utf8') image_format = b'png' # b'jpeg' or b'png' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for bbox in image_data.bboxes: xmins.append(bbox.min_x / width) xmaxs.append(bbox.max_x / width) ymins.append(bbox.min_y / height) ymaxs.append(bbox.max_y / height) classes_text.append(bbox.class_label.encode('utf8')) classes.append(bbox.class_id) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_png), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(example, height, width): ## Udacity's real-life data set ## height = 1096 # Image height ## width = 1368 # Image width ## Udacity's simulator data set ## height = 600 ## width = 800 filename = example['filename'] # Filename of the image. Empty if image is not from file filename = filename.encode() with tf.gfile.GFile(example['filename'], 'rb') as fid: encoded_image = fid.read() image_format = 'jpg'.encode() xmins = [] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for box in example['annotations']: #print("adding box") xmins.append(float(box['xmin'] / width)) xmaxs.append(float((box['xmin'] + box['x_width']) / width)) ymins.append(float(box['ymin'] / height)) ymaxs.append(float((box['ymin']+ box['y_height']) / height)) classes_text.append(box['class'].encode()) classes.append(int(LABEL_DICT[box['class']])) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def __create_tf_example(frame_data, sorted_label_list): im = PIL.Image.open(io.BytesIO(frame_data.image)) arr = io.BytesIO() if frame_data.format == 'jpg': format = 'JPEG' else: format = frame_data.format.upper() im.save(arr, format=format) height = im.height width = im.width encoded_image_data = arr.getvalue() rects, labels = bbox_writer.convert_text_to_rects_and_labels( frame_data.bboxes_text) # List of normalized coordinates, 1 per box, capped to [0, 1] xmins = [max(min(rect[0] / width, 1), 0) for rect in rects] # left x xmaxs = [max(min(rect[2] / width, 1), 0) for rect in rects] # right x ymins = [max(min(rect[1] / height, 1), 0) for rect in rects] # top y ymaxs = [max(min(rect[3] / height, 1), 0) for rect in rects] # bottom y classes_txt = [label.encode('utf-8') for label in labels] # String names label_to_id_dict = {label: i for i, label in enumerate(sorted_label_list)} class_ids = [label_to_id_dict[label] for label in labels] tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(frame_data.filename.encode('utf-8')), 'image/source_id': dataset_util.bytes_feature(frame_data.filename.encode('utf-8')), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(frame_data.format.encode('utf-8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_txt), 'image/object/class/label': dataset_util.int64_list_feature(class_ids), })) label_counter_for_frame = collections.Counter(labels) is_negative = len(rects) == 0 return tf_example, label_counter_for_frame, is_negative
def group_to_tf_record(point, image_directory): format_png = b'png' format_jpg = b'jpeg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] class_nums = [] class_ids = [] # changed point[0] to point as is just one point image_id = point['id'] if image_id.startswith('frame'): filename = os.path.join(image_directory, image_id + '.png') format = format_png else: filename = os.path.join(image_directory, image_id + '.jpg') #.decode() format = format_jpg try: image = Image.open(filename) width, height = image.size with tf.gfile.GFile(filename, 'rb') as fid: encoded_image = bytes(fid.read()) except: return None key = hashlib.sha256(encoded_image).hexdigest() for anno in point['annotations']: xmins.append(float(anno['x0'])) xmaxs.append(float(anno['x1'])) ymins.append(float(anno['y0'])) ymaxs.append(float(anno['y1'])) class_nums.append(anno['class_num']) class_ids.append(bytes(anno['label'].encode('utf8'))) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/filename': dataset_util.bytes_feature(bytes(filename.encode('utf8'))), 'image/source_id': dataset_util.bytes_feature(bytes(image_id.encode('utf8'))), 'image/encoded': dataset_util.bytes_feature(encoded_image), 'image/format': dataset_util.bytes_feature(format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(class_ids), 'image/object/class/label': dataset_util.int64_list_feature(class_nums) })) return tf_example
def create_tf_example(filename): coordinates = filename.rsplit('/', 1)[-1].rsplit('.', 1)[0].split('-')[2] leftUp, rightDown = [[int(eel) for eel in el.split('&')] for el in coordinates.split('_')] xmin, ymin = leftUp xmax, ymax = rightDown with tf.gfile.GFile(filename, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) height = image.height width = image.width key = hashlib.sha256(encoded_jpg).hexdigest() ymins = [float(ymin) / height] xmins = [float(xmin) / width] ymaxs = [float(ymax) / height] xmaxs = [float(xmax) / width] labels_text = ['vehicle plate'.encode('utf8')] labels = [2] # print("---------image size:",image.size) # print("---------xmin:{}, ymin:{}, xmax:{}, ymax:{}".format(xmin,ymin,xmax,ymax)) # print("---------width:{}, height:{}".format(width,height)) feature_dict = { 'image/height': dataset_util.int64_feature(int(height)), 'image/width': dataset_util.int64_feature(int(width)), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), # 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), # 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(labels_text), 'image/object/class/label': dataset_util.int64_list_feature(labels), } example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return example
def create_tfdatapoint(file_loc, file, labels): img = Image.open(os.path.join(file_loc, 'images', file)) (width, height) = img.size encoded = tf.io.gfile.GFile(os.path.join(file_loc, 'images', file), "rb").read() encoded = bytes(encoded) image_format = b'png' filename = file.split('.')[0] data = np.genfromtxt(os.path.join(file_loc, 'labels', filename + '.txt')) data = data.reshape(int(data.size / 5), 5) classes = [int(x) for x in data[:, 0]] classes_text = [labels[x].encode('utf8') for x in classes] xmins = data[:, 1] - (data[:, 3] / 2.0) xmaxs = data[:, 1] + (data[:, 3] / 2.0) ymins = data[:, 2] - (data[:, 4] / 2.0) ymaxs = data[:, 2] + (data[:, 4] / 2.0) tf_label_and_data = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(str.encode(filename)), 'image/source_id': dataset_util.bytes_feature(str.encode(filename)), 'image/encoded': dataset_util.bytes_feature(encoded), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_label_and_data
def create_tf_example(group, path): # Opening and readinf the files with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: encoded_jpg = fid.read() # Encode the image in jpeg format to array values encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) # Setting up the image size width, height = image.size #Creating the boundary box coordinate instances such as xmin,ymin,xmax,ymax filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin']) xmaxs.append(row['xmax']) ymins.append(row['ymin']) ymaxs.append(row['ymax']) classes.append(row['class'].encode('utf8')) # This is already exisiting code to convert csv to tfrecord tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/label': dataset_util.bytes_list_feature( classes), })) return tf_example
def create_tf_example(group, path): base = os.path.splitext(group.filename)[0] with tf.gfile.GFile(os.path.join(path, base + ".jpg"), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size filename = group.filename.encode('utf8') image_format = b'jpg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for index, row in group.object.iterrows(): xmins.append(row['xmin'] / width) xmaxs.append(row['xmax'] / width) ymins.append(row['ymin'] / height) ymaxs.append(row['ymax'] / height) classes_text.append(int_to_class_text(row['classID']).encode('utf8')) classes.append(row['classID']+1) tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(height, width, filename, encoded_image_data, image_format, xmins, xmaxs, ymins, ymaxs, classes_text, classes): tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), # Image height 'image/width': dataset_util.int64_feature(width), # Image width 'image/filename': dataset_util.bytes_feature(filename), # Filename of the image 'image/source_id': dataset_util.bytes_feature(filename), # Filename of the image 'image/encoded': dataset_util.bytes_feature( encoded_image_data), # Encoded image bytes 'image/format': dataset_util.bytes_feature(image_format), # b'jpeg' or b'png' 'image/object/bbox/xmin': dataset_util.float_list_feature( xmins), # normalized left x coordinate in bounding box 'image/object/bbox/xmax': dataset_util.float_list_feature( xmaxs), # normalized right x coordinate in bounding box 'image/object/bbox/ymin': dataset_util.float_list_feature( ymins), # normalized top y coordinate in bounding box 'image/object/bbox/ymax': dataset_util.float_list_feature( ymaxs), # normalized bottom y coordinate in bounding box 'image/object/class/text': dataset_util.bytes_list_feature( classes_text), # string class name of bounding box 'image/object/class/label': dataset_util.int64_list_feature( classes), # integer class id of bounding box })) return tf_example
def create_tf_example(csv, img_dir): img_fname = csv[0] x1, y1, x2, y2 = list(map(int, csv[1:-1])) cls_idx = int(csv[-1]) cls_text = config.CLASS_NAMES[cls_idx].encode('utf8') with tf.gfile.GFile(os.path.join(img_dir, img_fname), 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = Image.open(encoded_jpg_io) width, height = image.size xmin = [x1 / width] xmax = [x2 / width] ymin = [y1 / height] ymax = [y2 / height] cls_text = [cls_text] cls_idx = [cls_idx] filename = img_fname.encode('utf8') image_format = b'jpg' tf_example = tf.train.Example(features=tf.train.Features(feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(cls_text), 'image/object/class/label': dataset_util.int64_list_feature(cls_idx), })) return tf_example
def create_tf_example(image, image_dir, bbox_annotations=None, category_index=None, include_mask=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id', u'not_exhaustive_category_ids', u'neg_category_ids'] image_dir: directory containing the image files. bbox_annotations: list of dicts with keys: [u'segmentation', u'area', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official LVIS dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). category_index: a dict containing LVIS category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_mask: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: success: whether the conversion is successful filename: image filename example: The converted tf.Example Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['coco_url'] filename = osp.join(*filename.split('/')[-2:]) image_id = image['id'] image_not_exhaustive_category_ids = image['not_exhaustive_category_ids'] image_neg_category_ids = image['neg_category_ids'] full_path = os.path.join(image_dir, filename) if not tf.gfile.Exists(full_path): tf.logging.warn(f'image {full_path} not exists! skip') return False, None, None with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() key = hashlib.sha256(encoded_jpg).hexdigest() feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/not_exhaustive_category_ids': dataset_util.int64_list_feature(image_not_exhaustive_category_ids), 'image/image_neg_category_ids': dataset_util.int64_list_feature(image_neg_category_ids), } if bbox_annotations: xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] for object_annotations in bbox_annotations: (x, y, width, height) = tuple(object_annotations['bbox']) xmin_single = max(float(x) / image_width, 0.0) xmax_single = min(float(x + width) / image_width, 1.0) ymin_single = max(float(y) / image_height, 0.0) ymax_single = min(float(y + height) / image_height, 1.0) if xmax_single <= xmin_single or ymax_single <= ymin_single: continue xmin.append(xmin_single) xmax.append(xmax_single) ymin.append(ymin_single) ymax.append(ymax_single) is_crowd.append(0) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_mask: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict.update({ 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/class/label': dataset_util.int64_list_feature(category_ids), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), }) if include_mask: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return True, filename, example
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): """Converts image and annotations to a tf.Example proto. Args: image: dict with keys: [u'license', u'file_name', u'coco_url', u'height', u'width', u'date_captured', u'flickr_url', u'id'] annotations_list: list of dicts with keys: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] Notice that bounding box coordinates in the official COCO dataset are given as [x, y, width, height] tuples using absolute coordinates where x, y represent the top-left (0-indexed) corner. This function converts to the format expected by the Tensorflow Object Detection API (which is which is [ymin, xmin, ymax, xmax] with coordinates normalized relative to image size). image_dir: directory containing the image files. category_index: a dict containing COCO category information keyed by the 'id' field of each category. See the label_map_util.create_category_index function. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. Returns: example: The converted tf.Example num_annotations_skipped: Number of (invalid) annotations that were ignored. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ image_height = image['height'] image_width = image['width'] filename = image['file_name'] image_id = image['id'] full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() key = hashlib.sha256(encoded_jpg).hexdigest() xmin = [] xmax = [] ymin = [] ymax = [] is_crowd = [] category_names = [] category_ids = [] area = [] encoded_mask_png = [] num_annotations_skipped = 0 for object_annotations in annotations_list: (x, y, width, height) = tuple(object_annotations['bbox']) if width <= 0 or height <= 0: num_annotations_skipped += 1 continue if x + width > image_width or y + height > image_height: num_annotations_skipped += 1 continue xmin.append(float(x) / image_width) xmax.append(float(x + width) / image_width) ymin.append(float(y) / image_height) ymax.append(float(y + height) / image_height) is_crowd.append(object_annotations['iscrowd']) category_id = int(object_annotations['category_id']) category_ids.append(category_id) category_names.append( category_index[category_id]['name'].encode('utf8')) area.append(object_annotations['area']) if include_masks: run_len_encoding = mask.frPyObjects( object_annotations['segmentation'], image_height, image_width) binary_mask = mask.decode(run_len_encoding) if not object_annotations['iscrowd']: binary_mask = np.amax(binary_mask, axis=2) pil_image = PIL.Image.fromarray(binary_mask) output_io = io.BytesIO() pil_image.save(output_io, format='PNG') encoded_mask_png.append(output_io.getvalue()) feature_dict = { 'image/height': dataset_util.int64_feature(image_height), 'image/width': dataset_util.int64_feature(image_width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(str(image_id).encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(category_names), 'image/object/is_crowd': dataset_util.int64_list_feature(is_crowd), 'image/object/area': dataset_util.float_list_feature(area), } if include_masks: feature_dict['image/object/mask'] = ( dataset_util.bytes_list_feature(encoded_mask_png)) example = tf.train.Example(features=tf.train.Features( feature=feature_dict)) return key, example, num_annotations_skipped
def create_tf_example(): count = 0 counter = 0 writer = tf.python_io.TFRecordWriter( "/Data2TB/chl_data/rgb/train/augmented/train.record") #output file #with open(filename) as f: # content = f.readlines() #content = [x.strip() for x in content] #new_img = PIL.Image.new("L", (480, 640)) #new_img.putdata(content) #with tf.gfile.GFile(filename, 'rb') as fid: # encoded_jpg = fid.read() with open("/Data2TB/chl_data/rgb/train/augmented/train_pos_neg.json") as f: jsondata = json.load(f) for i in range(0, len(jsondata['frames'])): #looping through JSON objects height = jsondata['frames'][i]["height"] # Image height width = jsondata['frames'][i]["width"] # Image width #filename = "/Data2TB/correctly_registered/augmented/combined/" + example # Filename of the image. Empty if image is not from file #encoded_image_data = None # Encoded image bytes filename_only = jsondata['frames'][i]['file'] print(str(i) + ": " + filename_only) filename = "/Data2TB/chl_data/rgb/train/augmented/pos_neg_png/" + filename_only with tf.gfile.GFile(filename, 'rb') as fid: encoded_jpg = fid.read() xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [ ] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) for j in range(0, len(jsondata['frames'][i]['annotations'])): if (jsondata['frames'][i]['annotations'][j]['label'] == 'Head'): xmin = (jsondata['frames'][i]['annotations'][j]['x']) / width xmax = ( jsondata['frames'][i]['annotations'][j]['x'] + jsondata['frames'][i]['annotations'][j]['width']) / width ymin = (jsondata['frames'][i]['annotations'][j]['y']) / height ymax = ( jsondata['frames'][i]['annotations'][j]['y'] + jsondata['frames'][i]['annotations'][j]['height']) / height if xmin > 1: xmin = 1.0 if xmax > 1: xmax = 1.0 if ymin > 1: ymin = 1.0 if ymax > 1: ymax = 1.0 if (xmin > 1 or xmax > 1 or ymin > 1 or ymax > 1): print("UNNORMALIZED STUFF") xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) classes_text.append('head') classes.append(1) #elif(jsondata['frames'][i]['annotations'][j]['label'] == 'Right Shoulder' or jsondata['frames'][i]['annotations'][j]['label'] == 'Left Shoulder'): # xmin = (jsondata['frames'][i]['annotations'][j]['x']) # ymin = (jsondata['frames'][i]['annotations'][j]['y']) # if(xmin + 2 > width): # xmin = width - 2 # if(ymin + 2 > height): # ymin = height - 2 # xmax = xmin + 2 # ymax = ymin + 2 # xminf = xmin/width # xmaxf = xmax/width # yminf = ymin/height # ymaxf = ymax/height # # if(xminf > 1 or xmaxf > 1 or yminf >1 or ymaxf > 1): # print("UNNORMALIZED STUFF") # xmins.append(xminf) # xmaxs.append(xmaxf) # ymins.append(yminf) # ymaxs.append(ymaxf) # classes_text.append('shoulder') # classes.append(2) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/filename': dataset_util.bytes_feature(str.encode(filename)), 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) writer.write(tf_example.SerializeToString()) writer.close()
def _create_tf_entry(self, categories, img, label, filename, annotations): imageFormat = b'jpg' width, height = img.size imgByteArr = io.BytesIO() img.save(imgByteArr, format='JPEG') encodedImageData = imgByteArr.getvalue() xmins = [] xmaxs = [] ymins = [] ymaxs = [] for annotation in annotations: rect = None if type( annotation.data ) is Rectangle: #currently we only support Rect annotations, TODO: change me rect = annotation.data elif type(annotation.data) is Polygon: rect = annotation.data.rect if rect is not None: trimmed_rect = rect.trim( Rectangle(0, 0, width, height) ) #scale to image dimension in case annotation exceeds image width/height if trimmed_rect.left < 0: raise ImageMonkeyGeneralError( "trimmed rect left dimension invalid! (<0)") if trimmed_rect.top < 0: raise ImageMonkeyGeneralError( "trimmed rect top dimension invalid! (<0)") if trimmed_rect.width < 0: raise ImageMonkeyGeneralError( "trimmed rect width dimension invalid! (<0)") if trimmed_rect.height < 0: raise ImageMonkeyGeneralError( "trimmed rect height dimension invalid! (<0)") if (trimmed_rect.left + trimmed_rect.width) > width: raise ImageMonkeyGeneralError( "bounding box width > image width!") if (trimmed_rect.top + trimmed_rect.height) > height: raise ImageMonkeyGeneralError( "bounding box height > image height!") xmin = trimmed_rect.left / float(width) xmax = (trimmed_rect.left + trimmed_rect.width) / float(width) ymin = trimmed_rect.top / float(height) ymax = (trimmed_rect.top + trimmed_rect.height) / float(height) #sanity checks if xmin > xmax: raise ImageMonkeyGeneralError("xmin > xmax!") if ymin > ymax: raise ImageMonkeyGeneralError("ymin > ymax!") if (xmin == 0) and (xmax == 0) and (ymin == 0) and (ymax == 0): continue #skip bounding boxes that are 0 xmins.append(xmin) xmaxs.append(xmax) ymins.append(ymin) ymaxs.append(ymax) #we might have some images in our dataset, which don't have a annotation, skip those if ((len(xmins) == 0) or (len(xmaxs) == 0) or (len(ymins) == 0) or (len(ymaxs) == 0)): return None classes = [(categories.index(label) + 1)] * len( xmins) #class indexes start with 1 labels = [label.encode('utf8')] * len(xmins) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode()), 'image/source_id': dataset_util.bytes_feature(filename.encode()), 'image/encoded': dataset_util.bytes_feature(encodedImageData), 'image/format': dataset_util.bytes_feature(imageFormat), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(labels), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def create_tf_example(filename, label_file): img = cv2.imread(filename) height, width, channels = img.shape with tf.gfile.GFile(filename, 'rb') as fid: encoded_image_data = fid.read() image_format = b'jpg' xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per box) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) with open(label_file, 'r') as f: csvreader = csv.reader(f, delimiter=' ') head = True for row in csvreader: if head: head = False continue name = row[-1] classes_text.append(name) classes.append(get_index(name)) xmins.append(float(row[0]) / width) xmaxs.append(float(row[2]) / width) ymins.append(float(row[1]) / height) ymaxs.append(float(row[3]) / height) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename), 'image/source_id': dataset_util.bytes_feature(filename), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(data, image_path, label_map_dict, ignore_difficult_instances=False, image_subdirectory='images'): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) image_path: Full path to image file label_map_dict: A map from string label names to integers ids. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). image_subdirectory: String specifying subdirectory within the PASCAL dataset directory holding the actual image data. Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ # img_path = os.path.join( # data['folder'], image_subdirectory, data['filename']) # full_path = os.path.join(dataset_directory, img_path) full_path = image_path with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() width = int(data['size']['width']) height = int(data['size']['height']) filename = full_path.split('/')[-1] xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] if 'object' in data: for obj in data['object']: difficult = False # bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult)) xmin.append(float(obj['bndbox']['xmin']) / width) ymin.append(float(obj['bndbox']['ymin']) / height) xmax.append(float(obj['bndbox']['xmax']) / width) ymax.append(float(obj['bndbox']['ymax']) / height) classes_text.append(obj['name'].encode('utf8')) classes.append(label_map_dict[obj['name']]) # truncated.append(int(obj['truncated'])) truncated.append(0) # poses.append(obj['pose'].encode('utf8')) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def background_tf_example(image_path, ): """ Args: image_path: Full path to image file Returns: example: The converted tf.Example. """ full_path = image_path with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest() filename = full_path.split('/')[-1] width = image.width height = image.height xmin = [] ymin = [] xmax = [] ymax = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), })) return example
def json_to_record(j): assert (len(j["image_size"]) == 1) assert (len(j["categories"]) == len(j["annotations"])) image_size = j["image_size"][0] height = image_size["height"] width = image_size["width"] filename = os.path.basename(j["file"]) # actual image bytes? refer to dataset_tools/create_pet_tf_record.py with tf.gfile.GFile(j["file"], "rb") as fid: encoded_jpg = fid.read() pass encoded_image_data = encoded_jpg image_format = b'jpeg' xmins = [] xmaxs = [] ymins = [] ymaxs = [] classes_text = [] classes = [] for annot in j["annotations"]: c_name = class_id_to_name(annot["class_id"]) classes_text.append(c_name.encode("utf8")) # class_ids are indexed by 1 for tensorflow classes.append(annot["class_id"] + 1) corners = get_box_corners(annot) xmins.append(corners["xmin"] / width) xmaxs.append(corners["xmax"] / width) ymins.append(corners["ymin"] / height) ymaxs.append(corners["ymax"] / height) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode("utf8")), 'image/source_id': dataset_util.bytes_feature(filename.encode("utf8")), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature(image_format), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) # print(tf_example) return tf_example pass
def create_tf_example(image, annotations_list, image_dir, category_index, include_masks=False): # image_height = image[2] # image_width = image[1] # filename = image[0]# TODO(user): Populate the following variables from your example. # print(image) height = image['height'] # Image height width = image['width'] # Image width filename = image[ 'filename'] # Filename of the image. Empty if image is not from file full_path = os.path.join(image_dir, filename) with tf.gfile.GFile(full_path, 'rb') as fid: encoded_jpg = fid.read() encoded_image_io = io.BytesIO(encoded_jpg) # Encoded image bytes image = PIL.Image.open(encoded_image_io) only_file_name, image_format = os.path.splitext(filename) xmins = [ ] # List of normalized left x coordinates in bounding box (1 per box) xmaxs = [] # List of normalized right x coordinates in bounding box # (1 per box) ymins = [ ] # List of normalized top y coordinates in bounding box (1 per box) ymaxs = [] # List of normalized bottom y coordinates in bounding box # (1 per bo) classes_text = [] # List of string class name of bounding box (1 per box) classes = [] # List of integer class id of bounding box (1 per box) # print(len(annotations_list)) for annotation in annotations_list: # print(annotation) xmins.append(annotation['xmin'] / width) xmaxs.append(annotation['xmax'] / width) ymins.append(annotation['ymin'] / height) ymaxs.append(annotation['ymax'] / height) classes_text.append(annotation['label_text'].encode('utf8')) classes.append(annotation['label']) tf_example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(filename.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(filename.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature(image_format.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), })) return tf_example
def dict_to_tf_example(writer, data): """Convert XML derived dict to tf.Example proto. Notice that this function normalizes the bounding box coordinates provided by the raw data. Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) Returns: example: The converted tf.Example. Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ full_path = os.path.join(data['folder'], data['filename']).replace( '/home/data/usrs/jiangyz/images', '/data1/chenyf') #print('full_path%s'%full_path) OriImg = PIL.Image.open(full_path) if (OriImg.mode != 'RGB'): OriImg = OriImg.convert("RGB") print(full_path + ' is not a rgb image, converting...') OriImgArray = np.asarray(OriImg) w = int(OriImgArray.shape[1]) h = int(OriImgArray.shape[0]) for obj in data['object']: difficult = bool(int(obj['difficult'])) if difficult: print('there is a difficult instance.....') raw_input() continue left = int(obj['bndbox']['xmin']) top = int(obj['bndbox']['ymin']) right = int(obj['bndbox']['xmax']) down = int(obj['bndbox']['ymax']) # if (right-left)*(down-top)<w*h/4: # continue difficult_obj = int(difficult) imgSinglePerson = OriImg imgSingle = np.asarray(imgSinglePerson) img_raw = imgSingle.tostring() classes_text = obj['name'].encode('utf8') classes = 0 kp_cor_v = [int(x) for x in obj['keypoints']['visible']] truncated = int(obj['truncated']) poses = obj['pose'].encode('utf8') kpNum = FLAGS.kpNum if (sum(1 for x in kp_cor_v if x) < (kpNum + 1) / 2): continue kp_cor = [] for tmp_id in range(kpNum): if kp_cor_v[tmp_id] != 0: #convert to imgSinglePerson xc = int(obj['keypoints']['x'][tmp_id]) yc = int(obj['keypoints']['y'][tmp_id]) kp_cor.append((xc, yc)) global _all_num _all_num += 1 #tf.summary.image('image',tf.convert_to_tensor(np.array([OriImgArray])),1) #show images #color = [(255,0,0),(0,255,0),(0,0,255),(255,255,0),(0,255,255),(255,0,255),(0,0,0)] heatmap = np.zeros([h, w, kpNum], np.float32) #print('shape of heatmap=',np.shape(heatmap)) sigma = 10 for idx, cor_xy in enumerate(kp_cor): cor_x, cor_y = cor_xy r = 36 # int(8/96.0*224) for ii in range(-r, r + 1, 1): for jj in range(-r, r + 1, 1): xxxx = cor_x + ii yyyy = cor_y + jj if (xxxx < 0) or (yyyy < 0) or (xxxx > w - 1) or (yyyy > h - 1): continue heatmap[yyyy, xxxx, idx] += np.exp(-(ii * ii + jj * jj) / (2 * sigma * sigma)) #heatmap[yyyy,xxxx]=255 heatmap[heatmap > 1] = 1.0 #print('length of heatmap=',len(hm_raw)) example = tf.train.Example(features=tf.train.Features( feature={ 'image/height': dataset_util.int64_feature(h), 'image/width': dataset_util.int64_feature(w), 'image/filename': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature(data['filename'].encode('utf8')), 'image/encoded': dataset_util.bytes_feature(img_raw), 'image/heatmap': dataset_util.float_list_feature(heatmap.flatten()), 'image/keypointnumber': dataset_util.int64_feature(kpNum), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/class/text': dataset_util.bytes_feature(classes_text), 'image/object/class/label': dataset_util.int64_feature(classes), 'image/object/difficult': dataset_util.int64_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_feature(truncated), 'image/object/view': dataset_util.bytes_feature(poses), })) writer.write(example.SerializeToString())
def read_xml_make_tfrecord(): num_data = 8 for i in range(num_data): globals()['train_writer_{:05d}-of-{:05d}'.format( int(i), int(num_data))] = tensorflow.io.TFRecordWriter( 'tfrecord/train/train.tfrecord-{:05d}-of-{:05d}'.format( int(i), int(num_data))) for i in range(int(num_data / 8)): globals()['test_writer_{:05d}-of-{:05d}'.format( int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter( 'tfrecord/test/test.tfrecord-{:05d}-of-{:05d}'.format( int(i), int(num_data / 8))) globals()['valid_writer_{:05d}-of-{:05d}'.format( int(i), int(num_data / 8))] = tensorflow.io.TFRecordWriter( 'tfrecord/valid/valid.tfrecord-{:05d}-of-{:05d}'.format( int(i), int(num_data / 8))) length = len(os.listdir(folder)) for number, img_name in enumerate(os.listdir(folder)): if img_name[-4:] != '.jpg': continue filename = img_name[:-4] img = cv2.imread(folder + filename + ".jpg") height, width = img.shape[:2] mask = cv2.imread('mask/' + filename + '.jpg', 0) mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, np.ones((5, 5), np.uint8)) cv2.imshow("asdas", mask) cv2.waitKey() _, contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) print(contours) contours = sorted(contours, key=lambda x: len(x), reverse=True) x = [temp[0][0] for temp in contours[0]] y = [temp[0][1] for temp in contours[0]] xmin = min(x) xmax = max(x) ymin = min(y) ymax = max(y) # cv2.circle(img,(xmin,ymin),5,(255,0,0),5) # cv2.circle(img, (xmax, ymax), 5, (255, 0, 0), 5) # cv2.imshow("asd",img) # cv2.waitKey() object_name = 'passport' pixel_val = 255 with tensorflow.io.gfile.GFile(folder + filename + ".jpg", 'rb') as fid: encoded_image_data = fid.read() key = hashlib.sha256(encoded_image_data).hexdigest() with tensorflow.io.gfile.GFile('mask/' + filename + ".jpg", 'rb') as fid: encoded_mask_data = fid.read() encoded_mask = io.BytesIO(encoded_mask_data) mask = Image.open(encoded_mask) mask_np = np.asarray(mask.convert('L')) mask_remapped = (mask_np == pixel_val).astype(np.uint8) # print("mask",mask_remapped.shape) # cv2.imshow("asd",mask_remapped*255) # cv2.waitKey() mask_img = Image.fromarray(mask_remapped) output = io.BytesIO() mask_img.save(output, format='PNG') xmins = [xmin / width] xmaxs = [xmax / width] ymins = [ymin / height] ymaxs = [ymax / height] classes_text = [object_name.encode('utf8')] classes = [1] masks = [output.getvalue()] print(img_name) print(xmins) print(xmaxs) print(ymins) print(ymaxs) print(classes_text) print(classes) print(masks) example = tensorflow.train.Example(features=tensorflow.train.Features( feature={ 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature(img_name.encode('utf8')), 'image/source_id': dataset_util.bytes_feature(img_name.encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_image_data), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/mask': dataset_util.bytes_list_feature(masks), })) if number < length * 0.8: globals()['train_writer_{:05d}-of-{:05d}'.format( int(number / (length * 0.8) * num_data), int(num_data))].write(example.SerializeToString()) elif number < length * 0.9: globals()['valid_writer_{:05d}-of-{:05d}'.format( int((number - length * 0.8) / (length * 0.1) * num_data / 8), int(num_data / 8))].write(example.SerializeToString()) elif number < length: globals()['test_writer_{:05d}-of-{:05d}'.format( int((number - length * 0.9) / (length * 0.1) * num_data / 8), int(num_data / 8))].write(example.SerializeToString())