def test_get_config(self): self.assertEqual(MemoryDescriptorSet().get_config(), MemoryDescriptorSet.get_default_config()) self.assertEqual( MemoryDescriptorSet(None).get_config(), MemoryDescriptorSet.get_default_config()) empty_elem = DataMemoryElement() dme_key = 'smqtk.representation.data_element.memory_element.DataMemoryElement' self.assertEqual( MemoryDescriptorSet(empty_elem).get_config(), merge_dict(MemoryDescriptorSet.get_default_config(), {'cache_element': { 'type': dme_key }})) dict_pickle_bytes = pickle.dumps({1: 1, 2: 2, 3: 3}, -1) dict_pickle_bytes_str = dict_pickle_bytes.decode(BYTES_CONFIG_ENCODING) cache_elem = DataMemoryElement(bytes=dict_pickle_bytes) self.assertEqual( MemoryDescriptorSet(cache_elem).get_config(), merge_dict( MemoryDescriptorSet.get_default_config(), { 'cache_element': { dme_key: { 'bytes': dict_pickle_bytes_str }, 'type': dme_key } }))
def test_added_descriptor_table_caching(self): cache_elem = DataMemoryElement(readonly=False) descrs = [random_descriptor() for _ in range(3)] expected_table = dict((r.uuid(), r) for r in descrs) i = MemoryDescriptorSet(cache_elem) self.assertTrue(cache_elem.is_empty()) # Should add descriptors to table, caching to writable element. i.add_many_descriptors(descrs) self.assertFalse(cache_elem.is_empty()) self.assertEqual(pickle.loads(i.cache_element.get_bytes()), expected_table) # Changing the internal table (remove, add) it should reflect in # cache new_d = random_descriptor() expected_table[new_d.uuid()] = new_d i.add_descriptor(new_d) self.assertEqual(pickle.loads(i.cache_element.get_bytes()), expected_table) rm_d = list(expected_table.values())[0] del expected_table[rm_d.uuid()] i.remove_descriptor(rm_d.uuid()) self.assertEqual(pickle.loads(i.cache_element.get_bytes()), expected_table)
def test_set_bytes_when_readonly(self): bytes_a = 'test bytes first set' bytes_b = 'the second set of bytes' e = DataMemoryElement(bytes_a, readonly=True) ntools.assert_equal(e.get_bytes(), bytes_a) ntools.assert_raises(ReadOnlyError, e.set_bytes, bytes_b) ntools.assert_equal(e.get_bytes(), bytes_a)
def test_set_bytes(self): bytes_a = six.b('test bytes first set') bytes_b = six.b('the second set of bytes') e = DataMemoryElement(bytes_a) self.assertEqual(e.get_bytes(), bytes_a) e.set_bytes(bytes_b) self.assertEqual(e.get_bytes(), bytes_b)
def test_added_descriptor_table_caching(self): cache_elem = DataMemoryElement(readonly=False) descrs = [random_descriptor() for _ in range(3)] expected_table = dict((r.uuid(), r) for r in descrs) i = MemoryDescriptorIndex(cache_elem) self.assertTrue(cache_elem.is_empty()) # Should add descriptors to table, caching to writable element. i.add_many_descriptors(descrs) self.assertFalse(cache_elem.is_empty()) self.assertEqual(pickle.loads(i.cache_element.get_bytes()), expected_table) # Changing the internal table (remove, add) it should reflect in # cache new_d = random_descriptor() expected_table[new_d.uuid()] = new_d i.add_descriptor(new_d) self.assertEqual(pickle.loads(i.cache_element.get_bytes()), expected_table) rm_d = list(expected_table.values())[0] del expected_table[rm_d.uuid()] i.remove_descriptor(rm_d.uuid()) self.assertEqual(pickle.loads(i.cache_element.get_bytes()), expected_table)
def test_get_config(self, _m_cdg_setupNetwork): # Mocking set_network so we don't have to worry about actually # initializing any caffe things for this test. expected_params = { 'network_prototxt': DataMemoryElement(), 'network_model': DataMemoryElement(), 'image_mean': DataMemoryElement(), 'return_layer': 'layer name', 'batch_size': 777, 'use_gpu': False, 'gpu_device_id': 8, 'network_is_bgr': False, 'data_layer': 'data-other', 'load_truncated_images': True, 'pixel_rescale': (.2, .8), 'input_scale': 1.5, } # make sure that we're considering all constructor parameter options expected_param_keys = \ set(inspect.getargspec(CaffeDescriptorGenerator.__init__) .args[1:]) self.assertSetEqual(set(expected_params.keys()), expected_param_keys) g = CaffeDescriptorGenerator(**expected_params) for key in ('network_prototxt', 'network_model', 'image_mean'): expected_params[key] = to_config_dict(expected_params[key]) self.assertEqual(g.get_config(), expected_params)
def test_get_config(self): ntools.assert_equal(MemoryDescriptorIndex().get_config(), MemoryDescriptorIndex.get_default_config()) ntools.assert_equal( MemoryDescriptorIndex(None).get_config(), MemoryDescriptorIndex.get_default_config()) empty_elem = DataMemoryElement() ntools.assert_equal( MemoryDescriptorIndex(empty_elem).get_config(), merge_dict(MemoryDescriptorIndex.get_default_config(), {'cache_element': { 'type': 'DataMemoryElement' }})) dict_pickle_bytes = pickle.dumps({1: 1, 2: 2, 3: 3}, -1) cache_elem = DataMemoryElement(bytes=dict_pickle_bytes) ntools.assert_equal( MemoryDescriptorIndex(cache_elem).get_config(), merge_dict( MemoryDescriptorIndex.get_default_config(), { 'cache_element': { 'DataMemoryElement': { 'bytes': dict_pickle_bytes }, 'type': 'DataMemoryElement' } }))
def test_get_config(self): self.assertEqual(MemoryDescriptorIndex().get_config(), MemoryDescriptorIndex.get_default_config()) self.assertEqual( MemoryDescriptorIndex(None).get_config(), MemoryDescriptorIndex.get_default_config()) empty_elem = DataMemoryElement() self.assertEqual( MemoryDescriptorIndex(empty_elem).get_config(), merge_dict(MemoryDescriptorIndex.get_default_config(), {'cache_element': { 'type': 'DataMemoryElement' }})) dict_pickle_bytes = pickle.dumps({1: 1, 2: 2, 3: 3}, -1) dict_pickle_bytes_str = dict_pickle_bytes.decode(BYTES_CONFIG_ENCODING) cache_elem = DataMemoryElement(bytes=dict_pickle_bytes) self.assertEqual( MemoryDescriptorIndex(cache_elem).get_config(), merge_dict( MemoryDescriptorIndex.get_default_config(), { 'cache_element': { 'DataMemoryElement': { 'bytes': dict_pickle_bytes_str }, 'type': 'DataMemoryElement' } }))
def test_get_config(self, _m_cdg_setupNetwork): # Mocking set_network so we don't have to worry about actually # initializing any caffe things for this test. expected_params = { 'network_prototxt': DataMemoryElement(), 'network_model': DataMemoryElement(), 'image_mean': DataMemoryElement(), 'return_layer': 'layer name', 'batch_size': 777, 'use_gpu': False, 'gpu_device_id': 8, 'network_is_bgr': False, 'data_layer': 'data-other', 'load_truncated_images': True, 'pixel_rescale': (.2, .8), 'input_scale': 1.5, 'threads': 14, } # make sure that we're considering all constructor parameter # options default_params = CaffeDescriptorGenerator.get_default_config() assert set(default_params) == set(expected_params) g = CaffeDescriptorGenerator(**expected_params) # Shift to expecting sub-configs for DataElement params for key in ('network_prototxt', 'network_model', 'image_mean'): expected_params[key] = to_config_dict(expected_params[key]) assert g.get_config() == expected_params
def test_pickle_save_restore(self, m_cdg_setupNetwork): # Mocking set_network so we don't have to worry about actually # initializing any caffe things for this test. expected_params = { 'network_prototxt': DataMemoryElement(), 'network_model': DataMemoryElement(), 'image_mean': DataMemoryElement(), 'return_layer': 'layer name', 'batch_size': 777, 'use_gpu': False, 'gpu_device_id': 8, 'network_is_bgr': False, 'data_layer': 'data-other', 'load_truncated_images': True, 'pixel_rescale': (.2, .8), 'input_scale': 1.5, } g = CaffeDescriptorGenerator(**expected_params) # Initialization sets up the network on construction. self.assertEqual(m_cdg_setupNetwork.call_count, 1) g_pickled = pickle.dumps(g, -1) g2 = pickle.loads(g_pickled) # Network should be setup for second class class just like in # initial construction. self.assertEqual(m_cdg_setupNetwork.call_count, 2) self.assertIsInstance(g2, CaffeDescriptorGenerator) self.assertEqual(g.get_config(), g2.get_config())
def test_add_data(self): de = DataMemoryElement(six.b('some bytes'), 'text/plain', True) expected_map = {de.uuid(): de} dms = DataMemorySet() dms.add_data(de) self.assertEqual(dms._element_map, expected_map)
def test_set_bytes_when_readonly(self): bytes_a = six.b('test bytes first set') bytes_b = six.b('the second set of bytes') e = DataMemoryElement(bytes_a, readonly=True) self.assertEqual(e.get_bytes(), bytes_a) self.assertRaises(ReadOnlyError, e.set_bytes, bytes_b) self.assertEqual(e.get_bytes(), bytes_a)
def test_configuration(self): ex_descr_set = MemoryDescriptorSet() ex_i2u_kvs = MemoryKeyValueStore() ex_u2i_kvs = MemoryKeyValueStore() ex_index_elem = DataMemoryElement() ex_index_param_elem = DataMemoryElement() i = FaissNearestNeighborsIndex( descriptor_set=ex_descr_set, idx2uid_kvs=ex_i2u_kvs, uid2idx_kvs=ex_u2i_kvs, index_element=ex_index_elem, index_param_element=ex_index_param_elem, read_only=True, factory_string=u'some fact str', ivf_nprobe=88, use_gpu=False, gpu_id=99, random_seed=8, ) for inst in configuration_test_helper(i): assert isinstance(inst._descriptor_set, MemoryDescriptorSet) assert isinstance(inst._idx2uid_kvs, MemoryKeyValueStore) assert isinstance(inst._uid2idx_kvs, MemoryKeyValueStore) assert isinstance(inst._index_element, DataMemoryElement) assert isinstance(inst._index_param_element, DataMemoryElement) assert inst.read_only is True assert isinstance(inst.factory_string, six.string_types) assert inst.factory_string == 'some fact str' assert inst._ivf_nprobe == 88 assert inst._use_gpu is False assert inst._gpu_id == 99 assert inst.random_seed == 8
def test_build_index_with_cache(self): cache_element = DataMemoryElement() i = LinearHashIndex(cache_element) # noinspection PyTypeChecker i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]]) nose.tools.assert_equal(i.index, {1, 2, 3, 4}) nose.tools.assert_false(cache_element.is_empty())
def test_save_model_with_writable_caches(self): # If one or both cache elements are read-only, no saving. expected_mean_vec = numpy.array([1, 2, 3]) expected_rotation = numpy.eye(3) expected_mean_vec_bytes = six.BytesIO() # noinspection PyTypeChecker numpy.save(expected_mean_vec_bytes, expected_mean_vec) expected_mean_vec_bytes = expected_mean_vec_bytes.getvalue() expected_rotation_bytes = six.BytesIO() # noinspection PyTypeChecker numpy.save(expected_rotation_bytes, expected_rotation) expected_rotation_bytes = expected_rotation_bytes.getvalue() itq = ItqFunctor() itq.mean_vec = expected_mean_vec itq.rotation = expected_rotation itq.mean_vec_cache_elem = DataMemoryElement(readonly=False) itq.rotation_cache_elem = DataMemoryElement(readonly=False) itq.save_model() self.assertEqual(itq.mean_vec_cache_elem.get_bytes(), expected_mean_vec_bytes) self.assertEqual(itq.rotation_cache_elem.get_bytes(), expected_rotation_bytes)
def test_fit_with_cache(self): fit_descriptors = [] for i in range(5): d = DescriptorMemoryElement(six.b('test'), i) d.set_vector([-2. + i, -2. + i]) fit_descriptors.append(d) itq = ItqFunctor(DataMemoryElement(), DataMemoryElement(), bit_length=1, random_seed=0) itq.fit(fit_descriptors) # TODO: Explanation as to why this is the expected result. numpy.testing.assert_array_almost_equal(itq.mean_vec, [0, 0]) numpy.testing.assert_array_almost_equal(itq.rotation, [[1 / sqrt(2)], [1 / sqrt(2)]]) self.assertIsNotNone(itq.mean_vec_cache_elem) numpy.testing.assert_array_almost_equal( numpy.load(six.BytesIO(itq.mean_vec_cache_elem.get_bytes())), [0, 0]) self.assertIsNotNone(itq.rotation_cache_elem) numpy.testing.assert_array_almost_equal( numpy.load(six.BytesIO(itq.rotation_cache_elem.get_bytes())), [[1 / sqrt(2)], [1 / sqrt(2)]])
def test_load_as_matrix_empty_data(self): """ Test that we catch and do not load an empty data element. """ empty_de = DataMemoryElement(readonly=True, content_type='image/png') assert empty_de.is_empty() msg = "GdalImageReader cannot load 0-sized data" with pytest.raises(ValueError, match=msg): GdalImageReader().load_as_matrix(empty_de)
def test_save_cache(self): cache_element = DataMemoryElement() nose.tools.assert_true(cache_element.is_empty()) i = LinearHashIndex(cache_element) # noinspection PyTypeChecker i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]]) nose.tools.assert_false(cache_element.is_empty()) nose.tools.assert_true(len(cache_element.get_bytes()) > 0)
def test_build_index_with_cache(self): cache_element = DataMemoryElement() i = LinearHashIndex(cache_element) # noinspection PyTypeChecker i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]]) self.assertEqual(i.index, {1, 2, 3, 4}) self.assertFalse(cache_element.is_empty())
def test_add_with_caching(self): c = DataMemoryElement() s = MemoryKeyValueStore(c) expected_cache_dict = {'a': 'b', 'foo': None, 0: 89} s.add('a', 'b') s.add('foo', None) s.add(0, 89) nose.tools.assert_equal(pickle.loads(c.get_bytes()), expected_cache_dict)
def test_set_bytes_when_readonly(self): bytes_a = six.b('test bytes first set') bytes_b = six.b('the second set of bytes') e = DataMemoryElement(bytes_a, readonly=True) self.assertEqual(e.get_bytes(), bytes_a) self.assertRaises( ReadOnlyError, e.set_bytes, bytes_b ) self.assertEqual(e.get_bytes(), bytes_a)
def test_load_as_matrix_invalid_bytes(self): """ Test that data element with invalid data bytes fails to load. """ d = DataMemoryElement(content_type='image/png') d.set_bytes(b"not valid bytes") inst = PilImageReader() with pytest.raises(IOError, match="Failed to identify image from bytes " "provided by DataMemoryElement"): inst.load_as_matrix(d)
def test_save_cache_build_index(self): cache_element = DataMemoryElement() self.assertTrue(cache_element.is_empty()) i = LinearHashIndex(cache_element) # noinspection PyTypeChecker i.build_index([[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 0, 1]]) self.assertFalse(cache_element.is_empty()) # Check byte content expected_cache = {1, 2, 3, 4} actual_cache = set(numpy.load(BytesIO(cache_element.get_bytes()))) self.assertSetEqual(expected_cache, actual_cache)
def test_from_config(self): # Configured cache with some picked bytes expected_table = dict(a=1, b=2, c=3) expected_cache = DataMemoryElement(bytes=pickle.dumps(expected_table)) inst = MemoryDescriptorIndex.from_config({ 'cache_element': { 'type': 'DataMemoryElement', 'DataMemoryElement': {'bytes': expected_cache.get_bytes()} } }) self.assertEqual(inst.cache_element, expected_cache) self.assertEqual(inst._table, expected_table)
def dl_image(meta): try: c_type = meta['fields']['content_type'][0] obj_stored_url = meta['fields']['obj_stored_url'][0] obj_original_url = meta['fields']['obj_original_url'][0] c_ext = m.guess_extension(c_type, strict=False) if c_ext is None: log.warn( "Guessed 'None' extension for content-type '%s', " "skipping.", c_type) return None save_dir = os.path.abspath( os.path.expanduser( os.path.join(output_dir, meta['index'], meta['doc_type']))) save_file = meta['id'] + c_ext save_path = os.path.join(save_dir, save_file) # Save/write file if needed if not os.path.isfile(save_path): # First try 'stored' url, fallback on original # Return None if failed to download anything ok, r = try_download(obj_stored_url, stored_http_auth) if not ok: log.warn( "Failed to download stored-data URL \"%s\" " "(error=%s)", obj_stored_url, str(r)) ok, r = try_download(obj_original_url) if not ok: log.warn( "Failed to download original URL \"%s\" " "(error=%s)", obj_stored_url, str(r)) return None # Assuming OK at this point content = r.content d = DataMemoryElement(content, c_type) safe_create_dir(save_dir) with open(save_path, 'wb') as out: log.debug("Saving to file: '%s'", save_path) out.write(content) else: d = DataFileElement(save_path) return meta['id'], save_path, d.uuid() except KeyError, ex: log.error("Failed to find key %s in meta block: %s", str(ex), meta) raise
def dl_image(meta): try: c_type = meta['fields']['content_type'][0] obj_stored_url = meta['fields']['obj_stored_url'][0] obj_original_url = meta['fields']['obj_original_url'][0] c_ext = m.guess_extension(c_type, strict=False) if c_ext is None: log.warn("Guessed 'None' extension for content-type '%s', " "skipping.", c_type) return None save_dir = os.path.abspath(os.path.expanduser( os.path.join(output_dir, meta['index'], meta['doc_type']) )) save_file = meta['id'] + c_ext save_path = os.path.join(save_dir, save_file) # Save/write file if needed if not os.path.isfile(save_path): # First try 'stored' url, fallback on original # Return None if failed to download anything ok, r = try_download(obj_stored_url, stored_http_auth) if not ok: log.warn("Failed to download stored-data URL \"%s\" " "(error=%s)", obj_stored_url, str(r)) ok, r = try_download(obj_original_url) if not ok: log.warn("Failed to download original URL \"%s\" " "(error=%s)", obj_stored_url, str(r)) return None # Assuming OK at this point content = r.content d = DataMemoryElement(content, c_type) safe_create_dir(save_dir) with open(save_path, 'wb') as out: log.debug("Saving to file: '%s'", save_path) out.write(content) else: d = DataFileElement(save_path) return meta['id'], save_path, d.uuid() except KeyError, ex: log.error("Failed to find key %s in meta block: %s", str(ex), meta) raise
def test_remove_many_with_cache(self): starting_table = { 0: 0, 1: 1, 2: 2, } c = DataMemoryElement(pickle.dumps(starting_table)) s = MemoryKeyValueStore(c) self.assertDictEqual(s._table, starting_table) s.remove_many([0, 2]) self.assertDictEqual(pickle.loads(c.get_bytes()), {1: 1})
def test_add_with_caching(self): """ Test that we can add key-value pairs and they reflect in the cache element. """ c = DataMemoryElement() s = MemoryKeyValueStore(c) expected_cache_dict = {'a': 'b', 'foo': None, 0: 89} s.add('a', 'b') s.add('foo', None) s.add(0, 89) self.assertEqual(pickle.loads(c.get_bytes()), expected_cache_dict)
def test_configuration(self): default_config = DataMemoryElement.get_default_config() ntools.assert_equal(default_config, {'bytes': None, 'content_type': None}) default_config['bytes'] = 'Hello World.' default_config['content_type'] = 'text/plain' inst1 = DataMemoryElement.from_config(default_config) ntools.assert_equal(default_config, inst1.get_config()) ntools.assert_equal(inst1._bytes, 'Hello World.') ntools.assert_equal(inst1._content_type, 'text/plain') inst2 = DataMemoryElement.from_config(inst1.get_config()) ntools.assert_equal(inst1, inst2)
def test_from_config(self): # Configured cache with some picked bytes expected_table = dict(a=1, b=2, c=3) expected_cache = DataMemoryElement(bytes=pickle.dumps(expected_table)) inst = MemoryDescriptorIndex.from_config({ 'cache_element': { 'type': 'DataMemoryElement', 'DataMemoryElement': { 'bytes': expected_cache.get_bytes() } } }) ntools.assert_equal(inst.cache_element, expected_cache) ntools.assert_equal(inst._table, expected_table)
def test_cacheing_with_map(self): expected_cache = DataMemoryElement() expected_map = { 0: 'a', 75: 'b', 124769: 'c', } dms = DataMemorySet(expected_cache) dms._element_map = expected_map dms.cache() self.assertFalse(expected_cache.is_empty()) self.assertEqual(pickle.loads(expected_cache.get_bytes()), expected_map)
def test_configuration(self): default_config = DataMemoryElement.get_default_config() ntools.assert_equal(default_config, {'bytes': None, 'content_type': None, 'readonly': False}) default_config['bytes'] = 'Hello World.' default_config['content_type'] = 'text/plain' inst1 = DataMemoryElement.from_config(default_config) ntools.assert_equal(default_config, inst1.get_config()) ntools.assert_equal(inst1._bytes, 'Hello World.') ntools.assert_equal(inst1._content_type, 'text/plain') inst2 = DataMemoryElement.from_config(inst1.get_config()) ntools.assert_equal(inst1, inst2)
def test_add_many_with_caching(self): d = { 'a': 'b', 'foo': None, 0: 89, } c = DataMemoryElement() s = MemoryKeyValueStore(c) self.assertEqual(s._table, {}) self.assertEqual(c.get_bytes(), six.b("")) s.add_many(d) self.assertEqual(s._table, d) self.assertEqual(pickle.loads(c.get_bytes()), d)
def test_persistence_with_update_index(self): n1 = 100 n2 = 10 dim = 8 set1 = {DescriptorMemoryElement('test', i) for i in range(n1)} set2 = { DescriptorMemoryElement('test', i) for i in range(n1, n1 + n2) } [d.set_vector(np.random.rand(dim)) for d in (set1 | set2)] # Create index with persistent entities index_element = DataMemoryElement( content_type='application/octet-stream') index_param_element = DataMemoryElement(content_type='text/plain') index = self._make_inst(index_element=index_element, index_param_element=index_param_element) descriptor_set = index._descriptor_set idx2uid_kvs = index._idx2uid_kvs uid2idx_kvs = index._uid2idx_kvs # Build initial index. index.build_index(set1) self.assertEqual(index.count(), len(set1)) for d in set1: self.assertIn(d, index._descriptor_set) # Update and check that all intended descriptors are present in # index. index.update_index(set2) set_all = set1 | set2 self.assertEqual(index.count(), len(set_all)) for d in set_all: self.assertIn(d, index._descriptor_set) del index index = self._make_inst(descriptor_set=descriptor_set, idx2uid_kvs=idx2uid_kvs, uid2idx_kvs=uid2idx_kvs, index_element=index_element, index_param_element=index_param_element) # Check that NN can return something from the updated set. # - nearest element to the query element when the query is in the # index should be the query element. for q in set_all: n_elems, n_dists = index.nn(q) self.assertEqual(n_elems[0], q)
def test_get_config_with_cache_elements(self): itq = ItqFunctor(bit_length=5, itq_iterations=6, normalize=7, random_seed=8) itq.mean_vec_cache_elem = DataMemoryElement('cached vec bytes') itq.rotation_cache_elem = DataMemoryElement('cached rot bytes') c = itq.get_config() NT.assert_equal(c['bit_length'], 5) NT.assert_equal(c['itq_iterations'], 6) NT.assert_equal(c['normalize'], 7) NT.assert_equal(c['random_seed'], 8) NT.assert_equal(c['mean_vec_cache']['type'], "DataMemoryElement") NT.assert_equal(c['mean_vec_cache']['DataMemoryElement']['bytes'], 'cached vec bytes') NT.assert_equal(c['rotation_cache']['DataMemoryElement']['bytes'], 'cached rot bytes')
def test_configuration(self): default_config = DataMemoryElement.get_default_config() self.assertEqual(default_config, {'bytes': None, 'content_type': None, 'readonly': False}) default_config['bytes'] = 'Hello World.' default_config['content_type'] = 'text/plain' #: :type: DataMemoryElement inst1 = DataMemoryElement.from_config(default_config) self.assertEqual(default_config, inst1.get_config()) self.assertEqual(inst1._bytes, 'Hello World.') self.assertEqual(inst1._content_type, 'text/plain') inst2 = DataMemoryElement.from_config(inst1.get_config()) self.assertEqual(inst1, inst2)
def test_caching_readonly_cache(self): ro_cache = DataMemoryElement(readonly=True) dms = DataMemorySet(ro_cache) self.assertRaises( ReadOnlyError, dms.cache )
def test_save_model_with_cache(self, m_savez): cache_element = DataMemoryElement() bt = SkLearnBallTreeHashIndex(cache_element, random_seed=0) m = np.random.randint(0, 2, 1000 * 256).reshape(1000, 256) bt._build_bt_internal(m) self.assertTrue(m_savez.called) self.assertEqual(m_savez.call_count, 1)
def test_add_with_caching(self): """ Test that we can add key-value pairs and they reflect in the cache element. """ c = DataMemoryElement() s = MemoryKeyValueStore(c) expected_cache_dict = {'a': 'b', 'foo': None, 0: 89} s.add('a', 'b') s.add('foo', None) s.add(0, 89) self.assertEqual( pickle.loads(c.get_bytes()), expected_cache_dict )
def test_remove_with_cache(self): """ Test that removal correctly updates the cache element. """ existing_data = { 0: 1, 'a': 'b', } c = DataMemoryElement(pickle.dumps(existing_data)) s = MemoryKeyValueStore(c) self.assertDictEqual(s._table, existing_data) s.remove('a') self.assertDictEqual(s._table, {0: 1}) self.assertDictEqual(pickle.loads(c.get_bytes()), {0: 1})
def test_add_many_with_caching(self): """ Test that adding many reflects in cache. """ d = { 'a': 'b', 'foo': None, 0: 89, } c = DataMemoryElement() s = MemoryKeyValueStore(c) self.assertEqual(s._table, {}) self.assertEqual(c.get_bytes(), six.b("")) s.add_many(d) self.assertEqual(s._table, d) self.assertEqual( pickle.loads(c.get_bytes()), d )
def test_save_cache_remove_from_index(self): # Test that the cache is updated appropriately on a removal. cache_element = DataMemoryElement() self.assertTrue(cache_element.is_empty()) i = LinearHashIndex(cache_element) # noinspection PyTypeChecker i.build_index([[0, 1, 0], # 2 [0, 1, 1], # 3 [1, 0, 0], # 4 [1, 1, 0]]) # 6 self.assertFalse(cache_element.is_empty()) self.assertSetEqual( set(numpy.load(BytesIO(cache_element.get_bytes()))), {2, 3, 4, 6} ) # noinspection PyTypeChecker i.remove_from_index([[0, 1, 1], # 3 [1, 0, 0]]) # 4 self.assertFalse(cache_element.is_empty()) self.assertSetEqual( set(numpy.load(BytesIO(cache_element.get_bytes()))), {2, 6} )
def resolve_data_element(self, uri): """ Given the URI to some data, resolve it down to a DataElement instance. :raises ValueError: Issue with the given URI regarding either URI source resolution or data resolution. :param uri: URI to data :type uri: str :return: DataElement instance wrapping given URI to data. :rtype: smqtk.representation.DataElement """ self._log.debug("Resolving URI: %s", uri) # Resolve URI into appropriate DataElement instance if uri[:7] == "file://": self._log.debug("Given local disk filepath") filepath = uri[7:] if not os.path.isfile(filepath): raise ValueError("File URI did not point to an existing file " "on disk.") else: de = DataFileElement(filepath) elif uri[:9] == "base64://": self._log.debug("Given base64 string") content_type = flask.request.args.get('content_type', None) self._log.debug("Content type: %s", content_type) if not content_type: raise ValueError("No content-type with given base64 data") else: b64str = uri[9:] de = DataMemoryElement.from_base64(b64str, content_type) else: self._log.debug("Given URL") try: de = DataUrlElement(uri) except requests.HTTPError as ex: raise ValueError("Failed to initialize URL element due to " "HTTPError: %s" % str(ex)) return de
def test_add_data(self): mem_kv = MemoryKeyValueStore() kvds = KVSDataSet(mem_kv) de1 = DataMemoryElement(six.b('bytes1')) de2 = DataMemoryElement(six.b('bytes2')) kvds.add_data(de1, de2) # Check that appropriate keys and values are retrievable and located in # used KV-store. self.assertIn(de1.uuid(), mem_kv) self.assertIn(de2.uuid(), mem_kv) self.assertEqual(mem_kv.get(de1.uuid()), de1) self.assertEqual(mem_kv.get(de2.uuid()), de2)
def test_from_uri_data_format(self): e = DataMemoryElement.from_uri(self.VALID_DATA_URI) self.assertIsInstance(e, DataMemoryElement) self.assertEqual(e.get_bytes(), self.EXPECTED_BYTES) self.assertEqual(e.content_type(), self.EXPECTED_CT)
def test_get_bytes_empty_bytes(self): e = DataMemoryElement('') self.assertEqual(e.get_bytes(), six.b(''))
def test_is_empty_nonzero_bytes(self): e = DataMemoryElement('some bytes') self.assertFalse(e.is_empty())
def test_writable_default(self): v = 'foo' e = DataMemoryElement(v) self.assertTrue(e.writable())
def test_get_bytes_some_bytes(self): expected_bytes = 'some bytes' e = DataMemoryElement(expected_bytes) self.assertEqual(e.get_bytes(), expected_bytes)
def test_from_base64_no_ct(self): e = DataMemoryElement.from_base64(self.VALID_BASE64) self.assertIsInstance(e, DataMemoryElement) self.assertEqual(e.get_bytes(), self.EXPECTED_BYTES)
def test_writable_when_not_readonly(self): e = DataMemoryElement('', readonly=False) self.assertTrue(e.writable())
def test_from_base64_with_ct(self): e = DataMemoryElement.from_base64(self.VALID_BASE64, self.EXPECTED_CT) self.assertIsInstance(e, DataMemoryElement) self.assertEqual(e.get_bytes(), self.EXPECTED_BYTES) self.assertEqual(e.content_type(), self.EXPECTED_CT)
def test_from_base64_empty_string(self): # Should translate to empty byte string e = DataMemoryElement.from_base64('', None) self.assertIsInstance(e, DataMemoryElement) self.assertEqual(e.get_bytes(), six.b(''))
def test_from_uri_empty_string(self): # Should return an element with no byte data e = DataMemoryElement.from_uri('') self.assertIsInstance(e, DataMemoryElement) # no base64 data, which should decode to no bytes self.assertEqual(e.get_bytes(), six.b(''))
def test_from_uri_base64_header_empty_data(self): e = DataMemoryElement.from_uri('base64://') self.assertIsInstance(e, DataMemoryElement) # no base64 data, which should decode to no bytes self.assertEqual(e.get_bytes(), six.b(''))
def test_is_empty_zero_bytes(self): e = DataMemoryElement('') self.assertTrue(e.is_empty())