Beispiel #1
0
    def test_csv_to_tf_example_one_raccoon_per_file(self):
        """Generate tf records for one raccoon from one file."""
        image_file_name = 'tmp_raccoon_image.jpg'
        image_data = np.random.rand(256, 256, 3)
        save_path = os.path.join(self.get_temp_dir(), image_file_name)
        image = PIL.Image.fromarray(image_data, 'RGB')
        image.save(save_path)
        #         這裡似乎是轉化檔案成某種格式

        column_names = [
            'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax',
            'ymax'
        ]
        raccoon_data = [('tmp_raccoon_image.jpg', 256, 256, 'raccoon', 64, 64,
                         192, 192)]
        #         這一行替圖片上class
        raccoon_df = pd.DataFrame(raccoon_data, columns=column_names)
        #     做成表格

        grouped = generate_tfrecord.split(raccoon_df, 'filename')
        for group in grouped:
            example = generate_tfrecord.create_tf_example(
                group, self.get_temp_dir())
        self._assertProtoEqual(
            example.features.feature['image/height'].int64_list.value, [256])
        self._assertProtoEqual(
            example.features.feature['image/width'].int64_list.value, [256])
        self._assertProtoEqual(
            example.features.feature['image/filename'].bytes_list.value,
            [image_file_name.encode('utf-8')])
        self._assertProtoEqual(
            example.features.feature['image/source_id'].bytes_list.value,
            [image_file_name.encode('utf-8')])
        self._assertProtoEqual(
            example.features.feature['image/format'].bytes_list.value,
            [b'jpg'])
        self._assertProtoEqual(
            example.features.feature['image/object/bbox/xmin'].float_list.
            value, [0.25])
        self._assertProtoEqual(
            example.features.feature['image/object/bbox/ymin'].float_list.
            value, [0.25])
        self._assertProtoEqual(
            example.features.feature['image/object/bbox/xmax'].float_list.
            value, [0.75])
        self._assertProtoEqual(
            example.features.feature['image/object/bbox/ymax'].float_list.
            value, [0.75])
        self._assertProtoEqual(
            example.features.feature['image/object/class/text'].bytes_list.
            value, [b'raccoon'])
        self._assertProtoEqual(
            example.features.feature['image/object/class/label'].int64_list.
            value, [1])
Beispiel #2
0
    def test_csv_to_tf_example_multiple_airplanes_per_file(self):
        """Generate tf records for multiple airplanes from one file."""
        image_file_name = 'tmp_airplane_image.jpg'
        image_data = np.random.rand(256, 256, 3)
        save_path = os.path.join(self.get_temp_dir(), image_file_name)
        image = PIL.Image.fromarray(image_data, 'RGB')
        image.save(save_path)

        column_names = [
            'filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax',
            'ymax'
        ]
        airplane_data = [
            ('tmp_airplane_image.jpg', 256, 256, 'airplane', 64, 64, 192, 192),
            ('tmp_airplane_image.jpg', 256, 256, 'airplane', 96, 96, 128, 128)
        ]
        airplane_df = pd.DataFrame(airplane_data, columns=column_names)

        grouped = generate_tfrecord.split(airplane_df, 'filename')
        for group in grouped:
            example = generate_tfrecord.create_tf_example(
                group, self.get_temp_dir())
        self._assertProtoEqual(
            example.features.feature['image/height'].int64_list.value, [256])
        self._assertProtoEqual(
            example.features.feature['image/width'].int64_list.value, [256])
        self._assertProtoEqual(
            example.features.feature['image/filename'].bytes_list.value,
            [image_file_name.encode('utf-8')])
        self._assertProtoEqual(
            example.features.feature['image/source_id'].bytes_list.value,
            [image_file_name.encode('utf-8')])
        self._assertProtoEqual(
            example.features.feature['image/format'].bytes_list.value,
            [b'jpg'])
        self._assertProtoEqual(
            example.features.feature['image/object/bbox/xmin'].float_list.
            value, [0.25, 0.375])
        self._assertProtoEqual(
            example.features.feature['image/object/bbox/ymin'].float_list.
            value, [0.25, 0.375])
        self._assertProtoEqual(
            example.features.feature['image/object/bbox/xmax'].float_list.
            value, [0.75, 0.5])
        self._assertProtoEqual(
            example.features.feature['image/object/bbox/ymax'].float_list.
            value, [0.75, 0.5])
        self._assertProtoEqual(
            example.features.feature['image/object/class/text'].bytes_list.
            value, [b'airplane', b'airplane'])
        self._assertProtoEqual(
            example.features.feature['image/object/class/label'].int64_list.
            value, [1, 1])
    def test_csv_to_tf_example_multiple_raccoons_per_file(self):
        """Generate tf records for multiple raccoons from one file."""
        image_file_name = 'tmp_raccoon_image.jpg'
        image_data = np.random.rand(256, 256, 3)
        save_path = os.path.join(self.get_temp_dir(), image_file_name)
        image = PIL.Image.fromarray(image_data, 'RGB')
        image.save(save_path)

        column_names = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
        raccoon_data = [('tmp_raccoon_image.jpg', 256, 256, 'raccoon', 64, 64, 192, 192),
                        ('tmp_raccoon_image.jpg', 256, 256, 'raccoon', 96, 96, 128, 128)]
        raccoon_df = pd.DataFrame(raccoon_data, columns=column_names)

        grouped = generate_tfrecord.split(raccoon_df, 'filename')
        for group in grouped:
            example = generate_tfrecord.create_tf_example(group, self.get_temp_dir())
        self._assertProtoEqual(
            example.features.feature['image/height'].int64_list.value, [256])
        self._assertProtoEqual(
            example.features.feature['image/width'].int64_list.value, [256])
        self._assertProtoEqual(
            example.features.feature['image/filename'].bytes_list.value,
            [image_file_name.encode('utf-8')])
        self._assertProtoEqual(
            example.features.feature['image/source_id'].bytes_list.value,
            [image_file_name.encode('utf-8')])
        self._assertProtoEqual(
            example.features.feature['image/format'].bytes_list.value, [b'jpg'])
        self._assertProtoEqual(
            example.features.feature['image/object/bbox/xmin'].float_list.value,
            [0.25, 0.375])
        self._assertProtoEqual(
            example.features.feature['image/object/bbox/ymin'].float_list.value,
            [0.25, 0.375])
        self._assertProtoEqual(
            example.features.feature['image/object/bbox/xmax'].float_list.value,
            [0.75, 0.5])
        self._assertProtoEqual(
            example.features.feature['image/object/bbox/ymax'].float_list.value,
            [0.75, 0.5])
        self._assertProtoEqual(
            example.features.feature['image/object/class/text'].bytes_list.value,
            [b'raccoon', b'raccoon'])
        self._assertProtoEqual(
            example.features.feature['image/object/class/label'].int64_list.value,
            [1, 1])
Beispiel #4
0
        do_augmentation(dataset_dir, output_dir, file_ext, strAugs=string_aug_name)

    if not generate_test_only:

        xml_df = xml_to_csv(image_train_path)  # for train
        xml_df.to_csv(csv_train, index=None)

        print('Successfully converted xml to csv for both train and test.')

        writer = tf.python_io.TFRecordWriter(tf_record_train)
        path = os.path.join(image_train_path)
        examples = pd.read_csv(csv_train)
        grouped = split(examples, 'filename')
        for group in grouped:
            print("generating: ", group)
            tf_example = create_tf_example(group, path)
            writer.write(tf_example.SerializeToString())
        ###



        writer.close()
        output_path = os.path.join(os.getcwd(), tf_record_train)
        print('Successfully created the TFRecords: {}'.format(output_path))



    xml_df = xml_to_csv(image_test_path)  # for test
    xml_df.to_csv(csv_test, index=None)

    writer = tf.python_io.TFRecordWriter(tf_record_test)
    def test_csv_to_tf_example_one_buses_multiple_files(self):
        """Generate tf records for one bus for multiple files."""
        image_file_one = 'tmp_bus_image_1.jpg'
        image_file_two = 'tmp_bus_image_2.jpg'
        image_data = np.random.rand(256, 256, 3)
        save_path_one = os.path.join(self.get_temp_dir(), image_file_one)
        save_path_two = os.path.join(self.get_temp_dir(), image_file_two)
        image = PIL.Image.fromarray(image_data, 'RGB')
        image.save(save_path_one)
        image.save(save_path_two)

        column_names = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
        bus_data = [('tmp_bus_image_1.jpg', 256, 256, 'bus', 64, 64, 192, 192),
                        ('tmp_bus_image_2.jpg', 256, 256, 'bus', 96, 96, 128, 128)]
        bus_df = pd.DataFrame(bus_data, columns=column_names)

        grouped = generate_tfrecord.split(bus_df, 'filename')
        for group in grouped:
            if group.filename == image_file_one:
                example = generate_tfrecord.create_tf_example(group, self.get_temp_dir())
                self._assertProtoEqual(
                    example.features.feature['image/height'].int64_list.value, [256])
                self._assertProtoEqual(
                    example.features.feature['image/width'].int64_list.value, [256])
                self._assertProtoEqual(
                    example.features.feature['image/filename'].bytes_list.value,
                    [image_file_one.encode('utf-8')])
                self._assertProtoEqual(
                    example.features.feature['image/source_id'].bytes_list.value,
                    [image_file_one.encode('utf-8')])
                self._assertProtoEqual(
                    example.features.feature['image/format'].bytes_list.value, [b'jpg'])
                self._assertProtoEqual(
                    example.features.feature['image/object/bbox/xmin'].float_list.value,
                    [0.25])
                self._assertProtoEqual(
                    example.features.feature['image/object/bbox/ymin'].float_list.value,
                    [0.25])
                self._assertProtoEqual(
                    example.features.feature['image/object/bbox/xmax'].float_list.value,
                    [0.75])
                self._assertProtoEqual(
                    example.features.feature['image/object/bbox/ymax'].float_list.value,
                    [0.75])
                self._assertProtoEqual(
                    example.features.feature['image/object/class/text'].bytes_list.value,
                    [b'bus'])
                self._assertProtoEqual(
                    example.features.feature['image/object/class/label'].int64_list.value,
                    [1])
            elif group.filename == image_file_two:
                example = generate_tfrecord.create_tf_example(group, self.get_temp_dir())
                self._assertProtoEqual(
                    example.features.feature['image/height'].int64_list.value, [256])
                self._assertProtoEqual(
                    example.features.feature['image/width'].int64_list.value, [256])
                self._assertProtoEqual(
                    example.features.feature['image/filename'].bytes_list.value,
                    [image_file_two.encode('utf-8')])
                self._assertProtoEqual(
                    example.features.feature['image/source_id'].bytes_list.value,
                    [image_file_two.encode('utf-8')])
                self._assertProtoEqual(
                    example.features.feature['image/format'].bytes_list.value, [b'jpg'])
                self._assertProtoEqual(
                    example.features.feature['image/object/bbox/xmin'].float_list.value,
                    [0.375])
                self._assertProtoEqual(
                    example.features.feature['image/object/bbox/ymin'].float_list.value,
                    [0.375])
                self._assertProtoEqual(
                    example.features.feature['image/object/bbox/xmax'].float_list.value,
                    [0.5])
                self._assertProtoEqual(
                    example.features.feature['image/object/bbox/ymax'].float_list.value,
                    [0.5])
                self._assertProtoEqual(
                    example.features.feature['image/object/class/text'].bytes_list.value,
                    [b'bus'])
                self._assertProtoEqual(
                    example.features.feature['image/object/class/label'].int64_list.value,
                    [1])
Beispiel #6
0
def main(_):
    if FLAGS.labelbox_format is 'json':
        json_path = FLAGS.json_path
        json_to_pascal(json_path)
        split_data(resize=FLAGS.resize)

    elif FLAGS.labelbox_format is 'PASCAL':
        image_dir = FLAGS.image_dir
        ann_dir = FLAGS.ann_dir
        split_data(image_dir, ann_dir, resize=FLAGS.resize)

    for folder in ['train', 'test']:
        labelmap_name = FLAGS.labelmap_path
        # image_path = os.path.join(os.getcwd(), ('images/' + folder))
        xml_df, num_of_images = xml_to_csv(folder,
                                           labelmap_name,
                                           resize=FLAGS.resize)
        xml_df.to_csv((folder + '_labels.csv'), index=None)
        # xml_df.to_csv(('images/' + folder + '_labels.csv'), index=None)
        print('Successfully converted xml to csv.')
        print(" Number of Images in: ", folder, num_of_images)
        # Run Generate_tfrecords
        gtfr.create_dict()
        if FLAGS.output_path is 'default':
            writer = tf.python_io.TFRecordWriter(
                '../docker_tf/transfer_learning/data/' + (folder + '.record'))
        else:
            writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
        if FLAGS.image_dir is 'default':
            path = os.path.join(os.getcwd(), folder)
        else:
            path = os.path.join(os.getcwd(), FLAGS.image_dir)
        if FLAGS.csv_input is 'default':
            examples = pd.read_csv((folder + '_labels.csv'))
        else:
            examples = pd.read_csv(FLAGS.csv_input)
        grouped = gtfr.split(examples, 'filename')
        for group in grouped:
            tf_example = gtfr.create_tf_example(group, path)
            writer.write(tf_example.SerializeToString())

        writer.close()
        output_path = os.path.join(os.getcwd(), FLAGS.output_path)
        print('Successfully created the TFRecords: {}'.format(output_path))

        if FLAGS.check_tfrecords:
            print(
                'Checking Validity of TFRecords. Expect image encoding alongside label data.'
            )
            if FLAGS.output_path is 'default':
                for example in tf.python_io.tf_record_iterator(
                        '../docker_tf/transfer_learning/data/' +
                    (folder + '.record')):
                    result = tf.train.Example.FromString(example)
                    print(result)
            else:
                for example in tf.python_io.tf_record_iterator(
                        FLAGS.output_path):
                    result = tf.train.Example.FromString(example)
                    print(result)
    # Clean up
    if FLAGS.cleanup:
        try:
            if FLAGS.csv_input is 'default':
                os.remove('test_labels.csv')
                os.remove('train_labels.csv')
            else:
                os.remove(FLAGS.csv_input)
            if FLAGS.image_dir is not 'default' or FLAGS.ann_dir is not 'default':
                shutil.rmtree(FLAGS.image_dir)
                shutil.rmtree(FLAGS.ann_dir)
            else:
                shutil.rmtree('Images/')
                shutil.rmtree('Annotations/')
            shutil.rmtree('test/')
            shutil.rmtree('train/')
        except Exception as e:
            print(
                "Cleanup failed, either a directory or file was missing. Could indicate failed download.",
                e)
def main(_):
    if FLAGS.labelbox_format is 'json':
        json_path = FLAGS.json_path
        json_to_pascal(json_path)
        split_data(resize=FLAGS.resize)

    elif FLAGS.labelbox_format is 'PASCAL':
        image_dir = FLAGS.image_dir
        ann_dir = FLAGS.ann_dir
        split_data(image_dir, ann_dir, resize=FLAGS.resize)

    for folder in ['train', 'test']:
        labelmap_name = FLAGS.labelmap_path
        # image_path = os.path.join(os.getcwd(), ('images/' + folder))
        xml_df, num_of_images = xml_to_csv(
            folder, labelmap_name, resize=FLAGS.resize)
        xml_df.to_csv((folder + '_labels.csv'), index=None)
        # xml_df.to_csv(('images/' + folder + '_labels.csv'), index=None)
        print('Successfully converted xml to csv.')
        print(" Number of Images in: ", folder, num_of_images)
        # Run Generate_tfrecords
        gtfr.create_dict()
        if FLAGS.output_path is 'default':
            writer = tf.python_io.TFRecordWriter(
                '../docker_tf/transfer_learning/data/' + (folder + '.record'))
        else:
            writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
        if FLAGS.image_dir is 'default':
            path = os.path.join(os.getcwd(), folder)
        else:
            path = os.path.join(os.getcwd(), FLAGS.image_dir)
        if FLAGS.csv_input is 'default':
            examples = pd.read_csv((folder + '_labels.csv'))
        else:
            examples = pd.read_csv(FLAGS.csv_input)
        grouped = gtfr.split(examples, 'filename')
        for group in grouped:
            tf_example = gtfr.create_tf_example(group, path)
            writer.write(tf_example.SerializeToString())

        writer.close()
        output_path = os.path.join(os.getcwd(), FLAGS.output_path)
        print('Successfully created the TFRecords: {}'.format(output_path))

        if FLAGS.check_tfrecords:
            print('Checking Validity of TFRecords. Expect image encoding alongside label data.')
            if FLAGS.output_path is 'default':
                for example in tf.python_io.tf_record_iterator(
                        '../docker_tf/transfer_learning/data/' + (folder + '.record')):
                    result = tf.train.Example.FromString(example)
                    print(result)
            else:
                for example in tf.python_io.tf_record_iterator(FLAGS.output_path):
                    result = tf.train.Example.FromString(example)
                    print(result)
    # Clean up
    if FLAGS.cleanup:
        try:
            if FLAGS.csv_input is 'default':
                os.remove('test_labels.csv')
                os.remove('train_labels.csv')
            else:
                os.remove(FLAGS.csv_input)
            if FLAGS.image_dir is not 'default' or FLAGS.ann_dir is not 'default':
                shutil.rmtree(FLAGS.image_dir)
                shutil.rmtree(FLAGS.ann_dir)
            else:
                shutil.rmtree('Images/')
                shutil.rmtree('Annotations/')
            shutil.rmtree('test/')
            shutil.rmtree('train/')
        except Exception as e:
            print(
                "Cleanup failed, either a directory or file was missing. Could indicate failed download.", e)