def test_add_data(self): de = DataMemoryElement(six.b('some bytes'), 'text/plain', True) expected_map = {de.uuid(): de} dms = DataMemorySet() dms.add_data(de) self.assertEqual(dms._element_map, expected_map)
def test_from_config_default(self): # From default configuration, which should be valid. Specifies no cache # pickle protocol -1. c = DataMemorySet.get_default_config() i = DataMemorySet.from_config(c) self.assertIsNone(i.cache_element) self.assertEqual(i.pickle_protocol, -1) self.assertEqual(i._element_map, {})
def test_from_config_empty_cache(self): # Specify a memory element cache with no pre-existing bytes. c = DataMemorySet.get_default_config() c['cache_element']['type'] = 'DataMemoryElement' i = DataMemorySet.from_config(c) ntools.assert_is_not_none(i.cache_element) ntools.assert_is_instance(i.cache_element, DataMemoryElement) ntools.assert_equal(i.cache_element.get_bytes(), '') ntools.assert_equal(i.pickle_protocol, -1) ntools.assert_equal(i._element_map, {})
def test_uuids(self): expected_map = { 0: 'a', 75: 'b', 124769: 'c', } dms = DataMemorySet() dms._element_map = expected_map self.assertEqual(dms.uuids(), {0, 75, 124769})
def test_configuration(self): default_config = DataMemorySet.get_default_config() ntools.assert_equal(default_config, {}) inst1 = DataMemorySet.from_config(default_config) # idempotency ntools.assert_equal(default_config, inst1.get_config()) inst2 = DataMemorySet.from_config(inst1.get_config()) ntools.assert_equal(inst1, inst2)
def test_count(self): expected_map = { 0: 'a', 75: 'b', 124769: 'c', } dms = DataMemorySet() dms._element_map = expected_map self.assertEqual(dms.count(), 3)
def test_from_config_empty_cache(self): # Specify a memory element cache with no pre-existing bytes. c = DataMemorySet.get_default_config() c['cache_element']['type'] = 'DataMemoryElement' i = DataMemorySet.from_config(c) self.assertIsNotNone(i.cache_element) self.assertIsInstance(i.cache_element, DataMemoryElement) self.assertEqual(i.cache_element.get_bytes(), six.b('')) self.assertEqual(i.pickle_protocol, -1) self.assertEqual(i._element_map, {})
def test_get_config_from_config_idempotence(self): default_c = DataMemorySet.get_default_config() self.assertEqual( DataMemorySet.from_config(default_c).get_config(), default_c) c = DataMemorySet.get_default_config() c['cache_element']['type'] = 'DataMemoryElement' c['cache_element']['DataMemoryElement']['readonly'] = True c['pickle_protocol'] = 1 self.assertEqual(DataMemorySet.from_config(c).get_config(), c)
def test_configuration(self): default_config = DataMemorySet.get_default_config() expected_config = {"file_cache": None} ntools.assert_equal(default_config, expected_config) inst1 = DataMemorySet.from_config(default_config) # idempotency ntools.assert_equal(default_config, inst1.get_config()) inst2 = DataMemorySet.from_config(inst1.get_config()) ntools.assert_equal(inst1, inst2)
def test_get_config_from_config_idempotence(self): default_c = DataMemorySet.get_default_config() self.assertEqual( DataMemorySet.from_config(default_c).get_config(), default_c) dme_key = 'smqtk.representation.data_element.memory_element.DataMemoryElement' c = DataMemorySet.get_default_config() c['cache_element']['type'] = dme_key c['cache_element'][dme_key]['readonly'] = True c['pickle_protocol'] = 1 self.assertEqual(DataMemorySet.from_config(c).get_config(), c)
def test_iter(self): expected_map = { 0: 'a', 75: 'b', 124769: 'c', } expected_map_values = {'a', 'b', 'c'} dms = DataMemorySet() dms._element_map = expected_map self.assertEqual(set(dms), expected_map_values) self.assertEqual(set(iter(dms)), expected_map_values)
def test_from_config_with_cache(self): # Use a cache element with bytes defining pickle of map to use. expected_map = dict(a=1, b=2, c=3) c = DataMemorySet.get_default_config() c['cache_element']['type'] = 'DataMemoryElement' c['cache_element']['DataMemoryElement']['bytes'] = \ pickle.dumps(expected_map) i = DataMemorySet.from_config(c) self.assertIsInstance(i.cache_element, DataMemoryElement) self.assertEqual(i.pickle_protocol, -1) self.assertEqual(i._element_map, expected_map)
def test_cacheing_with_map(self): expected_cache = DataMemoryElement() expected_map = { 0: 'a', 75: 'b', 124769: 'c', } dms = DataMemorySet(expected_cache) dms._element_map = expected_map dms.cache() self.assertFalse(expected_cache.is_empty()) self.assertEqual(pickle.loads(expected_cache.get_bytes()), expected_map)
def test_get_config_from_config_idempotence(self): default_c = DataMemorySet.get_default_config() self.assertEqual( DataMemorySet.from_config(default_c).get_config(), default_c ) c = DataMemorySet.get_default_config() c['cache_element']['type'] = 'DataMemoryElement' c['cache_element']['DataMemoryElement']['readonly'] = True c['pickle_protocol'] = 1 self.assertEqual( DataMemorySet.from_config(c).get_config(), c )
def test_from_config_with_cache(self): # Use a cache element with content defining pickle of map to use. expected_map = dict(a=1, b=2, c=3) dme_key = 'smqtk.representation.data_element.memory_element.DataMemoryElement' c = DataMemorySet.get_default_config() c['cache_element']['type'] = dme_key c['cache_element'][dme_key]['bytes'] = \ pickle.dumps(expected_map).decode(BYTES_CONFIG_ENCODING) i = DataMemorySet.from_config(c) self.assertIsInstance(i.cache_element, DataMemoryElement) self.assertEqual(i.pickle_protocol, -1) self.assertEqual(i._element_map, expected_map)
def test_default_config(self): default_config = DataMemorySet.get_default_config() ntools.assert_equal(len(default_config), 2) ntools.assert_in('cache_element', default_config) ntools.assert_is_instance(default_config['cache_element'], dict) ntools.assert_is_none(default_config['cache_element']['type']) ntools.assert_in('pickle_protocol', default_config)
def test_caching_readonly_cache(self): ro_cache = DataMemoryElement(readonly=True) dms = DataMemorySet(ro_cache) self.assertRaises( ReadOnlyError, dms.cache )
def test_default_config(self): default_config = DataMemorySet.get_default_config() self.assertEqual(len(default_config), 2) self.assertIn('cache_element', default_config) self.assertIsInstance(default_config['cache_element'], dict) self.assertIsNone(default_config['cache_element']['type']) self.assertIn('pickle_protocol', default_config)
def test_init_with_cache(self): expected_map = dict(a=1, b=2, c=3) expected_cache = DataMemoryElement(bytes=pickle.dumps(expected_map)) i = DataMemorySet(expected_cache) self.assertEqual(i.cache_element, expected_cache) self.assertEqual(i.pickle_protocol, -1) self.assertEqual(i._element_map, expected_map)
def test_has_uuid(self): expected_map = { 0: 'a', 75: 'b', 124769: 'c', } dms = DataMemorySet() dms._element_map = expected_map self.assertTrue(dms.has_uuid(0)) self.assertTrue(dms.has_uuid(75)) self.assertTrue(dms.has_uuid(124769))
def test_get_data_valid_uuid(self): expected_map = { 0: 'a', 75: 'b', 124769: 'c', } dms = DataMemorySet() dms._element_map = expected_map self.assertEqual(dms.get_data(0), 'a') self.assertEqual(dms.get_data(75), 'b') self.assertEqual(dms.get_data(124769), 'c')
def test_caching_no_map_no_cache(self): dms = DataMemorySet() # should do nothing dms.cache() ntools.assert_is_none(dms.cache_element) ntools.assert_equal(dms._element_map, {})
# CSV file detailing [cluster_id, ad_id, image_sha1] relationships. EVAL_CLUSTERS_ADS_IMAGES_CSV = "eval.CP1_clusters_ads_images.csv" # json-lines file of clusters missing from the above file. Should be at least # composed of: {"cluster_id": <str>, ... } EVAL_MISSING_CLUSTERS = "eval.cluster_scores.missing_clusters.jl" OUTPUT_DESCR_PROB_INDEX = "cp1_img_prob_descriptors.pickle" OUTPUT_MAX_JL = "cp1_scores_max.jl" OUTPUT_AVG_JL = "cp1_scores_avg.jl" ############################################################################### # Compute classification scores initialize_logging(logging.getLogger('smqtk'), logging.DEBUG) eval_data_set = DataMemorySet(EVAL_DATASET) img_prob_descr_index = MemoryDescriptorIndex(OUTPUT_DESCR_PROB_INDEX) img_prob_gen = CaffeDescriptorGenerator(CAFFE_DEPLOY, CAFFE_MODEL, CAFFE_IMG_MEAN, 'prob', batch_size=1000, use_gpu=True, load_truncated_images=True) img_c_mem_factory = ClassificationElementFactory(MemoryClassificationElement, {}) img_prob_classifier = IndexLabelClassifier(CAFFE_LABELS) eval_data2descr = {}
def test_caching_no_map_no_cache(self): dms = DataMemorySet() # should do nothing dms.cache() self.assertIsNone(dms.cache_element) self.assertEqual(dms._element_map, {})
def test_cacheing_no_map(self): dms = DataMemorySet(DataMemoryElement()) dms.cache() # technically caches something, but that something is an empty map. self.assertFalse(dms.cache_element.is_empty()) self.assertEqual(pickle.loads(dms.cache_element.get_bytes()), {})
def test_init_empty_cache(self): cache_elem = DataMemoryElement() i = DataMemorySet(cache_elem, 2) self.assertEqual(i.cache_element, cache_elem) self.assertEqual(i.pickle_protocol, 2) self.assertEqual(i._element_map, {})
def test_init_no_cache(self): i = DataMemorySet() self.assertIsNone(i.cache_element) self.assertEqual(i._element_map, {}) self.assertEqual(i.pickle_protocol, -1)
def test_is_usable(self): # no dependencies self.assertTrue(DataMemorySet.is_usable())
def test_is_usable(self): # no dependencies ntools.assert_true(DataMemorySet.is_usable())
def test_get_data_invalid_uuid(self): dms = DataMemorySet() self.assertRaises( KeyError, dms.get_data, 'invalid uuid' )
def test_init_no_cache(self): i = DataMemorySet() ntools.assert_is_none(i.cache_element) ntools.assert_equal(i._element_map, {}) ntools.assert_equal(i.pickle_protocol, -1)
def test_add_data_not_DataElement(self): dms = DataMemorySet() self.assertRaises( AssertionError, dms.add_data, "not data element" )