def test_csv_to_tf_example_one_raccoon_per_file(self): """Generate tf records for one raccoon from one file.""" image_file_name = 'tmp_raccoon_image.jpg' image_data = np.random.rand(256, 256, 3) save_path = os.path.join(self.get_temp_dir(), image_file_name) image = PIL.Image.fromarray(image_data, 'RGB') image.save(save_path) # 這裡似乎是轉化檔案成某種格式 column_names = [ 'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax' ] raccoon_data = [('tmp_raccoon_image.jpg', 256, 256, 'raccoon', 64, 64, 192, 192)] # 這一行替圖片上class raccoon_df = pd.DataFrame(raccoon_data, columns=column_names) # 做成表格 grouped = generate_tfrecord.split(raccoon_df, 'filename') for group in grouped: example = generate_tfrecord.create_tf_example( group, self.get_temp_dir()) self._assertProtoEqual( example.features.feature['image/height'].int64_list.value, [256]) self._assertProtoEqual( example.features.feature['image/width'].int64_list.value, [256]) self._assertProtoEqual( example.features.feature['image/filename'].bytes_list.value, [image_file_name.encode('utf-8')]) self._assertProtoEqual( example.features.feature['image/source_id'].bytes_list.value, [image_file_name.encode('utf-8')]) self._assertProtoEqual( example.features.feature['image/format'].bytes_list.value, [b'jpg']) self._assertProtoEqual( example.features.feature['image/object/bbox/xmin'].float_list. value, [0.25]) self._assertProtoEqual( example.features.feature['image/object/bbox/ymin'].float_list. value, [0.25]) self._assertProtoEqual( example.features.feature['image/object/bbox/xmax'].float_list. value, [0.75]) self._assertProtoEqual( example.features.feature['image/object/bbox/ymax'].float_list. value, [0.75]) self._assertProtoEqual( example.features.feature['image/object/class/text'].bytes_list. value, [b'raccoon']) self._assertProtoEqual( example.features.feature['image/object/class/label'].int64_list. value, [1])
def test_csv_to_tf_example_multiple_airplanes_per_file(self): """Generate tf records for multiple airplanes from one file.""" image_file_name = 'tmp_airplane_image.jpg' image_data = np.random.rand(256, 256, 3) save_path = os.path.join(self.get_temp_dir(), image_file_name) image = PIL.Image.fromarray(image_data, 'RGB') image.save(save_path) column_names = [ 'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax' ] airplane_data = [ ('tmp_airplane_image.jpg', 256, 256, 'airplane', 64, 64, 192, 192), ('tmp_airplane_image.jpg', 256, 256, 'airplane', 96, 96, 128, 128) ] airplane_df = pd.DataFrame(airplane_data, columns=column_names) grouped = generate_tfrecord.split(airplane_df, 'filename') for group in grouped: example = generate_tfrecord.create_tf_example( group, self.get_temp_dir()) self._assertProtoEqual( example.features.feature['image/height'].int64_list.value, [256]) self._assertProtoEqual( example.features.feature['image/width'].int64_list.value, [256]) self._assertProtoEqual( example.features.feature['image/filename'].bytes_list.value, [image_file_name.encode('utf-8')]) self._assertProtoEqual( example.features.feature['image/source_id'].bytes_list.value, [image_file_name.encode('utf-8')]) self._assertProtoEqual( example.features.feature['image/format'].bytes_list.value, [b'jpg']) self._assertProtoEqual( example.features.feature['image/object/bbox/xmin'].float_list. value, [0.25, 0.375]) self._assertProtoEqual( example.features.feature['image/object/bbox/ymin'].float_list. value, [0.25, 0.375]) self._assertProtoEqual( example.features.feature['image/object/bbox/xmax'].float_list. value, [0.75, 0.5]) self._assertProtoEqual( example.features.feature['image/object/bbox/ymax'].float_list. value, [0.75, 0.5]) self._assertProtoEqual( example.features.feature['image/object/class/text'].bytes_list. value, [b'airplane', b'airplane']) self._assertProtoEqual( example.features.feature['image/object/class/label'].int64_list. value, [1, 1])
def test_csv_to_tf_example_multiple_raccoons_per_file(self): """Generate tf records for multiple raccoons from one file.""" image_file_name = 'tmp_raccoon_image.jpg' image_data = np.random.rand(256, 256, 3) save_path = os.path.join(self.get_temp_dir(), image_file_name) image = PIL.Image.fromarray(image_data, 'RGB') image.save(save_path) column_names = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'] raccoon_data = [('tmp_raccoon_image.jpg', 256, 256, 'raccoon', 64, 64, 192, 192), ('tmp_raccoon_image.jpg', 256, 256, 'raccoon', 96, 96, 128, 128)] raccoon_df = pd.DataFrame(raccoon_data, columns=column_names) grouped = generate_tfrecord.split(raccoon_df, 'filename') for group in grouped: example = generate_tfrecord.create_tf_example(group, self.get_temp_dir()) self._assertProtoEqual( example.features.feature['image/height'].int64_list.value, [256]) self._assertProtoEqual( example.features.feature['image/width'].int64_list.value, [256]) self._assertProtoEqual( example.features.feature['image/filename'].bytes_list.value, [image_file_name.encode('utf-8')]) self._assertProtoEqual( example.features.feature['image/source_id'].bytes_list.value, [image_file_name.encode('utf-8')]) self._assertProtoEqual( example.features.feature['image/format'].bytes_list.value, [b'jpg']) self._assertProtoEqual( example.features.feature['image/object/bbox/xmin'].float_list.value, [0.25, 0.375]) self._assertProtoEqual( example.features.feature['image/object/bbox/ymin'].float_list.value, [0.25, 0.375]) self._assertProtoEqual( example.features.feature['image/object/bbox/xmax'].float_list.value, [0.75, 0.5]) self._assertProtoEqual( example.features.feature['image/object/bbox/ymax'].float_list.value, [0.75, 0.5]) self._assertProtoEqual( example.features.feature['image/object/class/text'].bytes_list.value, [b'raccoon', b'raccoon']) self._assertProtoEqual( example.features.feature['image/object/class/label'].int64_list.value, [1, 1])
do_augmentation(dataset_dir, output_dir, file_ext, strAugs=string_aug_name) if not generate_test_only: xml_df = xml_to_csv(image_train_path) # for train xml_df.to_csv(csv_train, index=None) print('Successfully converted xml to csv for both train and test.') writer = tf.python_io.TFRecordWriter(tf_record_train) path = os.path.join(image_train_path) examples = pd.read_csv(csv_train) grouped = split(examples, 'filename') for group in grouped: print("generating: ", group) tf_example = create_tf_example(group, path) writer.write(tf_example.SerializeToString()) ### writer.close() output_path = os.path.join(os.getcwd(), tf_record_train) print('Successfully created the TFRecords: {}'.format(output_path)) xml_df = xml_to_csv(image_test_path) # for test xml_df.to_csv(csv_test, index=None) writer = tf.python_io.TFRecordWriter(tf_record_test)
def test_csv_to_tf_example_one_buses_multiple_files(self): """Generate tf records for one bus for multiple files.""" image_file_one = 'tmp_bus_image_1.jpg' image_file_two = 'tmp_bus_image_2.jpg' image_data = np.random.rand(256, 256, 3) save_path_one = os.path.join(self.get_temp_dir(), image_file_one) save_path_two = os.path.join(self.get_temp_dir(), image_file_two) image = PIL.Image.fromarray(image_data, 'RGB') image.save(save_path_one) image.save(save_path_two) column_names = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'] bus_data = [('tmp_bus_image_1.jpg', 256, 256, 'bus', 64, 64, 192, 192), ('tmp_bus_image_2.jpg', 256, 256, 'bus', 96, 96, 128, 128)] bus_df = pd.DataFrame(bus_data, columns=column_names) grouped = generate_tfrecord.split(bus_df, 'filename') for group in grouped: if group.filename == image_file_one: example = generate_tfrecord.create_tf_example(group, self.get_temp_dir()) self._assertProtoEqual( example.features.feature['image/height'].int64_list.value, [256]) self._assertProtoEqual( example.features.feature['image/width'].int64_list.value, [256]) self._assertProtoEqual( example.features.feature['image/filename'].bytes_list.value, [image_file_one.encode('utf-8')]) self._assertProtoEqual( example.features.feature['image/source_id'].bytes_list.value, [image_file_one.encode('utf-8')]) self._assertProtoEqual( example.features.feature['image/format'].bytes_list.value, [b'jpg']) self._assertProtoEqual( example.features.feature['image/object/bbox/xmin'].float_list.value, [0.25]) self._assertProtoEqual( example.features.feature['image/object/bbox/ymin'].float_list.value, [0.25]) self._assertProtoEqual( example.features.feature['image/object/bbox/xmax'].float_list.value, [0.75]) self._assertProtoEqual( example.features.feature['image/object/bbox/ymax'].float_list.value, [0.75]) self._assertProtoEqual( example.features.feature['image/object/class/text'].bytes_list.value, [b'bus']) self._assertProtoEqual( example.features.feature['image/object/class/label'].int64_list.value, [1]) elif group.filename == image_file_two: example = generate_tfrecord.create_tf_example(group, self.get_temp_dir()) self._assertProtoEqual( example.features.feature['image/height'].int64_list.value, [256]) self._assertProtoEqual( example.features.feature['image/width'].int64_list.value, [256]) self._assertProtoEqual( example.features.feature['image/filename'].bytes_list.value, [image_file_two.encode('utf-8')]) self._assertProtoEqual( example.features.feature['image/source_id'].bytes_list.value, [image_file_two.encode('utf-8')]) self._assertProtoEqual( example.features.feature['image/format'].bytes_list.value, [b'jpg']) self._assertProtoEqual( example.features.feature['image/object/bbox/xmin'].float_list.value, [0.375]) self._assertProtoEqual( example.features.feature['image/object/bbox/ymin'].float_list.value, [0.375]) self._assertProtoEqual( example.features.feature['image/object/bbox/xmax'].float_list.value, [0.5]) self._assertProtoEqual( example.features.feature['image/object/bbox/ymax'].float_list.value, [0.5]) self._assertProtoEqual( example.features.feature['image/object/class/text'].bytes_list.value, [b'bus']) self._assertProtoEqual( example.features.feature['image/object/class/label'].int64_list.value, [1])
def main(_): if FLAGS.labelbox_format is 'json': json_path = FLAGS.json_path json_to_pascal(json_path) split_data(resize=FLAGS.resize) elif FLAGS.labelbox_format is 'PASCAL': image_dir = FLAGS.image_dir ann_dir = FLAGS.ann_dir split_data(image_dir, ann_dir, resize=FLAGS.resize) for folder in ['train', 'test']: labelmap_name = FLAGS.labelmap_path # image_path = os.path.join(os.getcwd(), ('images/' + folder)) xml_df, num_of_images = xml_to_csv(folder, labelmap_name, resize=FLAGS.resize) xml_df.to_csv((folder + '_labels.csv'), index=None) # xml_df.to_csv(('images/' + folder + '_labels.csv'), index=None) print('Successfully converted xml to csv.') print(" Number of Images in: ", folder, num_of_images) # Run Generate_tfrecords gtfr.create_dict() if FLAGS.output_path is 'default': writer = tf.python_io.TFRecordWriter( '../docker_tf/transfer_learning/data/' + (folder + '.record')) else: writer = tf.python_io.TFRecordWriter(FLAGS.output_path) if FLAGS.image_dir is 'default': path = os.path.join(os.getcwd(), folder) else: path = os.path.join(os.getcwd(), FLAGS.image_dir) if FLAGS.csv_input is 'default': examples = pd.read_csv((folder + '_labels.csv')) else: examples = pd.read_csv(FLAGS.csv_input) grouped = gtfr.split(examples, 'filename') for group in grouped: tf_example = gtfr.create_tf_example(group, path) writer.write(tf_example.SerializeToString()) writer.close() output_path = os.path.join(os.getcwd(), FLAGS.output_path) print('Successfully created the TFRecords: {}'.format(output_path)) if FLAGS.check_tfrecords: print( 'Checking Validity of TFRecords. Expect image encoding alongside label data.' ) if FLAGS.output_path is 'default': for example in tf.python_io.tf_record_iterator( '../docker_tf/transfer_learning/data/' + (folder + '.record')): result = tf.train.Example.FromString(example) print(result) else: for example in tf.python_io.tf_record_iterator( FLAGS.output_path): result = tf.train.Example.FromString(example) print(result) # Clean up if FLAGS.cleanup: try: if FLAGS.csv_input is 'default': os.remove('test_labels.csv') os.remove('train_labels.csv') else: os.remove(FLAGS.csv_input) if FLAGS.image_dir is not 'default' or FLAGS.ann_dir is not 'default': shutil.rmtree(FLAGS.image_dir) shutil.rmtree(FLAGS.ann_dir) else: shutil.rmtree('Images/') shutil.rmtree('Annotations/') shutil.rmtree('test/') shutil.rmtree('train/') except Exception as e: print( "Cleanup failed, either a directory or file was missing. Could indicate failed download.", e)
def main(_): if FLAGS.labelbox_format is 'json': json_path = FLAGS.json_path json_to_pascal(json_path) split_data(resize=FLAGS.resize) elif FLAGS.labelbox_format is 'PASCAL': image_dir = FLAGS.image_dir ann_dir = FLAGS.ann_dir split_data(image_dir, ann_dir, resize=FLAGS.resize) for folder in ['train', 'test']: labelmap_name = FLAGS.labelmap_path # image_path = os.path.join(os.getcwd(), ('images/' + folder)) xml_df, num_of_images = xml_to_csv( folder, labelmap_name, resize=FLAGS.resize) xml_df.to_csv((folder + '_labels.csv'), index=None) # xml_df.to_csv(('images/' + folder + '_labels.csv'), index=None) print('Successfully converted xml to csv.') print(" Number of Images in: ", folder, num_of_images) # Run Generate_tfrecords gtfr.create_dict() if FLAGS.output_path is 'default': writer = tf.python_io.TFRecordWriter( '../docker_tf/transfer_learning/data/' + (folder + '.record')) else: writer = tf.python_io.TFRecordWriter(FLAGS.output_path) if FLAGS.image_dir is 'default': path = os.path.join(os.getcwd(), folder) else: path = os.path.join(os.getcwd(), FLAGS.image_dir) if FLAGS.csv_input is 'default': examples = pd.read_csv((folder + '_labels.csv')) else: examples = pd.read_csv(FLAGS.csv_input) grouped = gtfr.split(examples, 'filename') for group in grouped: tf_example = gtfr.create_tf_example(group, path) writer.write(tf_example.SerializeToString()) writer.close() output_path = os.path.join(os.getcwd(), FLAGS.output_path) print('Successfully created the TFRecords: {}'.format(output_path)) if FLAGS.check_tfrecords: print('Checking Validity of TFRecords. Expect image encoding alongside label data.') if FLAGS.output_path is 'default': for example in tf.python_io.tf_record_iterator( '../docker_tf/transfer_learning/data/' + (folder + '.record')): result = tf.train.Example.FromString(example) print(result) else: for example in tf.python_io.tf_record_iterator(FLAGS.output_path): result = tf.train.Example.FromString(example) print(result) # Clean up if FLAGS.cleanup: try: if FLAGS.csv_input is 'default': os.remove('test_labels.csv') os.remove('train_labels.csv') else: os.remove(FLAGS.csv_input) if FLAGS.image_dir is not 'default' or FLAGS.ann_dir is not 'default': shutil.rmtree(FLAGS.image_dir) shutil.rmtree(FLAGS.ann_dir) else: shutil.rmtree('Images/') shutil.rmtree('Annotations/') shutil.rmtree('test/') shutil.rmtree('train/') except Exception as e: print( "Cleanup failed, either a directory or file was missing. Could indicate failed download.", e)