Exemple #1
0
 def setUp(self):
     path = './test/test_files/json_data_file.json'
     source_type = 'json'
     params = {'path': path}
     self.dinv = DatasetInventoryMaster()
     self.dinv.create_from_source(source_type, params)
     self.inventory = self.dinv.data_inventory
def csv(args):
    """ Import From CSV """
    params = {'path': args['path'],
              'image_path_col_list': args['image_fields'],
              'capture_id_col': args['capture_id_field'],
              'attributes_col_list': args['label_fields'],
              'meta_col_list': args['meta_data_fields']}
    dinv = DatasetInventoryMaster()
    dinv.create_from_source('csv', params)
    return dinv
Exemple #3
0
class DataInventoryTests(unittest.TestCase):
    """ Test Creation of Dataset Inventory """
    def setUp(self):
        path = './test/test_files/json_data_file.json'
        source_type = 'json'
        params = {'path': path}
        self.dinv = DatasetInventoryMaster()
        self.dinv.create_from_source(source_type, params)
        self.inventory = self.dinv.data_inventory

    def testRemoveRecord(self):
        self.assertIn("single_species_standard", self.inventory)
        self.dinv.remove_record("single_species_standard")
        self.assertNotIn("single_species_standard", self.inventory)

    def testRemoveRecordsWithLabel(self):
        label_names = ['class', 'counts']
        label_values = ['elephant', '12']
        self.assertIn("is_elephant", self.inventory)
        self.assertIn("counts_is_12", self.inventory)
        self.dinv.remove_records_with_label(label_names, label_values)
        self.assertNotIn("is_elephant", self.inventory)
        self.assertNotIn("counts_is_12", self.inventory)

    def testKeepOnlyRecordsWithLabel(self):
        label_names = ['class', 'counts']
        label_values = ['elephant', '12']
        self.assertIn("is_elephant", self.inventory)
        self.assertIn("single_species_standard", self.inventory)
        self.assertIn("counts_is_12", self.inventory)
        self.dinv.keep_only_records_with_label(label_names, label_values)
        self.assertNotIn("single_species_standard", self.inventory)
        self.assertIn("is_elephant", self.inventory)
        self.assertIn("counts_is_12", self.inventory)

    def testConvertToTFRecordFormat(self):
        id = 'single_species_standard'
        self.dinv._map_labels_to_numeric()
        record = self.inventory[id]
        tfr_dict = self.dinv._convert_record_to_tfr_format(id, record)
        self.assertEqual(tfr_dict['id'], 'single_species_standard')
        self.assertEqual(tfr_dict['n_images'], 3)
        self.assertEqual(tfr_dict["image_paths"], [
            "\\images\\4715\\all\\cat\\10296725_0.jpeg",
            "\\images\\4715\\all\\cat\\10296726_0.jpeg",
            "\\images\\4715\\all\\cat\\10296727_0.jpeg"
        ])
        self.assertIsInstance(tfr_dict["label_num/class"][0], int)
        self.assertEqual(tfr_dict["label_num/color_brown"], [1])
        self.assertEqual(tfr_dict["label_num/color_white"], [0])
        self.assertIsInstance(tfr_dict["label_num/counts"][0], int)
        self.assertEqual(tfr_dict["label/class"], ['cat'])
        self.assertEqual(tfr_dict["label/color_brown"], ['1'])
        self.assertEqual(tfr_dict["label/color_white"], ['0'])
        self.assertEqual(tfr_dict["label/counts"], ['1'])

    def testRemoveMissingLabelRecords(self):
        self.assertIn("missing_counts_label", self.inventory)
        self.assertIn("counts_is_12", self.inventory)
        self.dinv._remove_records_with_any_missing_label()
        self.assertNotIn("missing_counts_label", self.inventory)
        self.assertIn("counts_is_12", self.inventory)
def panthera(args):
    """ Import From panthera """
    params = {'path': args['path']}
    dinv = DatasetInventoryMaster()
    dinv.create_from_source('panthera', params)
    return dinv
def class_dir(args):
    """ Import From Class Dirs"""
    params = {'path': args['path']}
    dinv = DatasetInventoryMaster()
    dinv.create_from_source('image_dir', params)
    return dinv
def json(args):
    """ Import From Json """
    params = {'path': args['path']}
    dinv = DatasetInventoryMaster()
    dinv.create_from_source('json', params)
    return dinv
Exemple #7
0
from config.config_logging import setup_logging
from data.inventory import DatasetInventoryMaster
from data.writer import DatasetWriter
from data.tfr_encoder_decoder import DefaultTFRecordEncoderDecoder
from data.reader import DatasetReader
from data.image import (preprocess_image, resize_jpeg)

# Configure Logging
setup_logging()
logger = logging.getLogger(__name__)

path = './test/test_images'
source_type = 'image_dir'
params = {'path': path}
dinv = DatasetInventoryMaster()
dinv.create_from_source(source_type, params)

dinv._calc_label_stats()
dinv.log_stats()

splitted = dinv.split_inventory_by_random_splits_with_balanced_sample(
    split_label_min='class',
    split_names=['train', 'val', 'test'],
    split_percent=[0.6, 0.2, 0.2])

tfr_encoder_decoder = DefaultTFRecordEncoderDecoder()
tfr_writer = DatasetWriter(tfr_encoder_decoder.encode_record)

tfr = {
    k: v.export_to_tfrecord(tfr_writer,
    # Configure Logging
    if args['log_outdir'] is None:
        args['log_outdir'] = args['output_dir']

    setup_logging(log_output_path=args['log_outdir'])

    logger = logging.getLogger(__name__)

    print("Using arguments:")
    for k, v in args.items():
        print("Arg: %s: %s" % (k, v))

    # Create Dataset Inventory
    params = {'path': args['inventory']}
    dinv = DatasetInventoryMaster()
    dinv.create_from_source('json', params)

    # Remove multi-label subjects
    if args['remove_multi_label_records']:
        dinv.remove_multi_label_records()

    # Remove specific labels
    if args['remove_label_name'] is not None:
        if args['remove_label_value'] is None:
            raise ValueError('if remove_label_name is specified\
                              remove_label_value needs to be specified')

        dinv.remove_records_with_label(
            label_name_list=args['remove_label_name'],
            label_value_list=args['remove_label_value'])