def setUp(self): path = './test/test_files/json_data_file.json' source_type = 'json' params = {'path': path} self.dinv = DatasetInventoryMaster() self.dinv.create_from_source(source_type, params) self.inventory = self.dinv.data_inventory
def csv(args): """ Import From CSV """ params = {'path': args['path'], 'image_path_col_list': args['image_fields'], 'capture_id_col': args['capture_id_field'], 'attributes_col_list': args['label_fields'], 'meta_col_list': args['meta_data_fields']} dinv = DatasetInventoryMaster() dinv.create_from_source('csv', params) return dinv
class DataInventoryTests(unittest.TestCase): """ Test Creation of Dataset Inventory """ def setUp(self): path = './test/test_files/json_data_file.json' source_type = 'json' params = {'path': path} self.dinv = DatasetInventoryMaster() self.dinv.create_from_source(source_type, params) self.inventory = self.dinv.data_inventory def testRemoveRecord(self): self.assertIn("single_species_standard", self.inventory) self.dinv.remove_record("single_species_standard") self.assertNotIn("single_species_standard", self.inventory) def testRemoveRecordsWithLabel(self): label_names = ['class', 'counts'] label_values = ['elephant', '12'] self.assertIn("is_elephant", self.inventory) self.assertIn("counts_is_12", self.inventory) self.dinv.remove_records_with_label(label_names, label_values) self.assertNotIn("is_elephant", self.inventory) self.assertNotIn("counts_is_12", self.inventory) def testKeepOnlyRecordsWithLabel(self): label_names = ['class', 'counts'] label_values = ['elephant', '12'] self.assertIn("is_elephant", self.inventory) self.assertIn("single_species_standard", self.inventory) self.assertIn("counts_is_12", self.inventory) self.dinv.keep_only_records_with_label(label_names, label_values) self.assertNotIn("single_species_standard", self.inventory) self.assertIn("is_elephant", self.inventory) self.assertIn("counts_is_12", self.inventory) def testConvertToTFRecordFormat(self): id = 'single_species_standard' self.dinv._map_labels_to_numeric() record = self.inventory[id] tfr_dict = self.dinv._convert_record_to_tfr_format(id, record) self.assertEqual(tfr_dict['id'], 'single_species_standard') self.assertEqual(tfr_dict['n_images'], 3) self.assertEqual(tfr_dict["image_paths"], [ "\\images\\4715\\all\\cat\\10296725_0.jpeg", "\\images\\4715\\all\\cat\\10296726_0.jpeg", "\\images\\4715\\all\\cat\\10296727_0.jpeg" ]) self.assertIsInstance(tfr_dict["label_num/class"][0], int) self.assertEqual(tfr_dict["label_num/color_brown"], [1]) self.assertEqual(tfr_dict["label_num/color_white"], [0]) self.assertIsInstance(tfr_dict["label_num/counts"][0], int) self.assertEqual(tfr_dict["label/class"], ['cat']) self.assertEqual(tfr_dict["label/color_brown"], ['1']) self.assertEqual(tfr_dict["label/color_white"], ['0']) self.assertEqual(tfr_dict["label/counts"], ['1']) def testRemoveMissingLabelRecords(self): self.assertIn("missing_counts_label", self.inventory) self.assertIn("counts_is_12", self.inventory) self.dinv._remove_records_with_any_missing_label() self.assertNotIn("missing_counts_label", self.inventory) self.assertIn("counts_is_12", self.inventory)
def panthera(args): """ Import From panthera """ params = {'path': args['path']} dinv = DatasetInventoryMaster() dinv.create_from_source('panthera', params) return dinv
def class_dir(args): """ Import From Class Dirs""" params = {'path': args['path']} dinv = DatasetInventoryMaster() dinv.create_from_source('image_dir', params) return dinv
def json(args): """ Import From Json """ params = {'path': args['path']} dinv = DatasetInventoryMaster() dinv.create_from_source('json', params) return dinv
from config.config_logging import setup_logging from data.inventory import DatasetInventoryMaster from data.writer import DatasetWriter from data.tfr_encoder_decoder import DefaultTFRecordEncoderDecoder from data.reader import DatasetReader from data.image import (preprocess_image, resize_jpeg) # Configure Logging setup_logging() logger = logging.getLogger(__name__) path = './test/test_images' source_type = 'image_dir' params = {'path': path} dinv = DatasetInventoryMaster() dinv.create_from_source(source_type, params) dinv._calc_label_stats() dinv.log_stats() splitted = dinv.split_inventory_by_random_splits_with_balanced_sample( split_label_min='class', split_names=['train', 'val', 'test'], split_percent=[0.6, 0.2, 0.2]) tfr_encoder_decoder = DefaultTFRecordEncoderDecoder() tfr_writer = DatasetWriter(tfr_encoder_decoder.encode_record) tfr = { k: v.export_to_tfrecord(tfr_writer,
# Configure Logging if args['log_outdir'] is None: args['log_outdir'] = args['output_dir'] setup_logging(log_output_path=args['log_outdir']) logger = logging.getLogger(__name__) print("Using arguments:") for k, v in args.items(): print("Arg: %s: %s" % (k, v)) # Create Dataset Inventory params = {'path': args['inventory']} dinv = DatasetInventoryMaster() dinv.create_from_source('json', params) # Remove multi-label subjects if args['remove_multi_label_records']: dinv.remove_multi_label_records() # Remove specific labels if args['remove_label_name'] is not None: if args['remove_label_value'] is None: raise ValueError('if remove_label_name is specified\ remove_label_value needs to be specified') dinv.remove_records_with_label( label_name_list=args['remove_label_name'], label_value_list=args['remove_label_value'])