def test_iter_file_tree_chunk4(self) -> None: test_dir_path = os.path.join(TEST_DATA_DIR, 'test_data_file_tree') expected_filepaths = { os.path.join(test_dir_path, '4/3/2/1/UUID_43210.dataElement'), } dfs = DataFileSet(test_dir_path, uuid_chunk=5, pickle_protocol=2) actual_filepaths = set(dfs._iter_file_tree()) self.assertSetEqual(actual_filepaths, expected_filepaths)
def test_fp_for_uuid(self) -> None: self.assertEqual( DataFileSet('/', None)._fp_for_uuid(0), '/UUID_0.dataElement') self.assertEqual( DataFileSet('/', None)._fp_for_uuid('abc'), '/UUID_abc.dataElement') self.assertEqual( DataFileSet('/', 3)._fp_for_uuid('abc'), '/a/b/UUID_abc.dataElement')
def test_iter(self, m_open: mock.MagicMock, m_pickle: mock.MagicMock) -> None: expected_file_tree_iter = ['/a', '/b', '/d'] dfs = DataFileSet('/') dfs._iter_file_tree = mock.MagicMock( # type: ignore return_value=expected_file_tree_iter) list(dfs) self.assertEqual(m_open.call_count, 3) self.assertEqual(m_pickle.load.call_count, 3) m_open.assert_any_call('/a', 'rb') m_open.assert_any_call('/b', 'rb') m_open.assert_any_call('/d', 'rb')
def test_iter_file_tree_chunk3(self) -> None: test_dir_path = os.path.join(TEST_DATA_DIR, 'test_data_file_tree') expected_filepaths = { os.path.join(test_dir_path, '0/0/UUID_000.dataElement'), os.path.join(test_dir_path, '0/0/UUID_001.dataElement'), os.path.join(test_dir_path, '0/1/UUID_012.dataElement'), os.path.join(test_dir_path, '1/8/UUID_180.dataElement'), os.path.join(test_dir_path, '3/1/UUID_317.dataElement'), } dfs = DataFileSet(test_dir_path, uuid_chunk=3, pickle_protocol=2) actual_filepaths = set(dfs._iter_file_tree()) self.assertSetEqual(actual_filepaths, expected_filepaths)
def test_configuration(self) -> None: inst = DataFileSet(root_directory='/some/dir', uuid_chunk=10, pickle_protocol=-1) for i in configuration_test_helper(inst): # type: DataFileSet assert i._root_dir == '/some/dir' assert i._uuid_chunk == 10 assert i.pickle_protocol == -1
def test_get_data_no_file(self, m_isfile: mock.MagicMock) -> None: # Testing when we generate a filepath that does not point to an existing # file, meaning the UUID is referring to a dataElement not a part of our # set. m_isfile.return_value = False dfs = DataFileSet(TEST_DATA_DIR, None) self.assertRaisesRegex(KeyError, 'no_exist_uuid', dfs.get_data, 'no_exist_uuid')
def test_new(self) -> None: # The following should be valid constructor parameter setups DataFileSet('/') DataFileSet('/', uuid_chunk=None) DataFileSet('/', uuid_chunk=1) DataFileSet('/', uuid_chunk=2346) DataFileSet('relative/path') DataFileSet('relative/path', uuid_chunk=None) DataFileSet('relative/path', uuid_chunk=1) DataFileSet('relative/path', uuid_chunk=2346)
def test_get_data_valid_filepath(self, m_isfile: mock.MagicMock, m_open: mock.MagicMock, m_pickle: mock.MagicMock) -> None: # Testing that filepath we get back from _fp_for_uuid generator is # valid, meaning that the given UUID does refer to a serialized # DataElement in our set, which is then opened and returned. m_isfile.return_value = True expected_uuid = 'abc' expected_filepath = os.path.join(TEST_DATA_DIR, 'UUID_abc.dataElement') expected_pickle_return = 'loaded DataElement instance' m_pickle.load.return_value = expected_pickle_return dfs = DataFileSet(TEST_DATA_DIR, None) actual_return = dfs.get_data(expected_uuid) m_isfile.assert_called_once_with(expected_filepath) m_open.assert_called_once_with(expected_filepath, 'rb') self.assertEqual(actual_return, expected_pickle_return)
def test_add_data_multiple_chunk3(self, m_isinstance: mock.MagicMock, m_scd: mock.MagicMock, m_open: mock.MagicMock, _m_pickle: mock.MagicMock) -> None: """ Test using the DataFileSet with "uuid_chunk" set to 3 (arbitrary choice) """ # Pretend that we are giving DataElement instances m_isinstance.return_value = True # Testing that appropriate directories are given to safe_create_dir and # appropriate filepaths are passed to open. expected_uuid_1 = "abcdefg" expected_uuid_2 = "1234567" expected_uuid_3 = "4F*s93#5" mock_elem_1 = mock.MagicMock() mock_elem_1.uuid.return_value = expected_uuid_1 mock_elem_2 = mock.MagicMock() mock_elem_2.uuid.return_value = expected_uuid_2 mock_elem_3 = mock.MagicMock() mock_elem_3.uuid.return_value = expected_uuid_3 # Chunk = 3 dfs = DataFileSet('/', uuid_chunk=3) # noinspection PyTypeChecker dfs.add_data(mock_elem_1, mock_elem_2, mock_elem_3) # Created correct directories self.assertEqual(m_scd.call_count, 3) m_scd.assert_any_call('/abc/de') m_scd.assert_any_call('/123/45') m_scd.assert_any_call('/4F*/s93') # called open correctly 3 times self.assertEqual(m_open.call_count, 3) m_open.assert_any_call('/abc/de/UUID_abcdefg.dataElement', 'wb') m_open.assert_any_call('/123/45/UUID_1234567.dataElement', 'wb') m_open.assert_any_call('/4F*/s93/UUID_4F*s93#5.dataElement', 'wb')
def test_uuids(self) -> None: # mocking self iteration results expected_data_elements = [ DataMemoryElement(b"a"), DataMemoryElement(b"b"), DataMemoryElement(b"v"), ] expected_uuid_set = { DataMemoryElement(b"a").uuid(), DataMemoryElement(b"b").uuid(), DataMemoryElement(b"v").uuid(), } # Replacement iterator for DataFileSet to yield expected test values. def test_iter() -> Iterator: for e in expected_data_elements: yield e with mock.patch('smqtk_dataprovider.impls.data_set.file.DataFileSet' '.__iter__') as m_iter: m_iter.side_effect = test_iter dfs = DataFileSet('/') self.assertSetEqual(dfs.uuids(), expected_uuid_set)
def test_containing_dir_not_str_uuid(self) -> None: self.assertEqual(DataFileSet('/', None)._containing_dir(4123458), "/") self.assertEqual( DataFileSet('/', 3)._containing_dir(4123458), "/412/34")
def test_containing_dir_str_uuid(self) -> None: # Chunk == None s = DataFileSet('/', uuid_chunk=None) self.assertEqual(s._containing_dir('0000'), '/') self.assertEqual(s._containing_dir('346'), '/') # Chunk == 1 s = DataFileSet('/', uuid_chunk=1) self.assertEqual(s._containing_dir('0000'), '/') self.assertEqual(s._containing_dir('346'), '/') # Chunk == 3 s = DataFileSet('/', uuid_chunk=3) self.assertEqual(s._containing_dir('123456'), '/12/34') self.assertEqual(s._containing_dir('685225624578'), '/6852/2562') self.assertEqual(s._containing_dir('1234567'), '/123/45')
def test_add_data_single(self, m_isinstance: mock.MagicMock, m_scd: mock.MagicMock, m_open: mock.MagicMock, _m_pickle: mock.MagicMock) -> None: # Pretend that we are giving DataElement instances m_isinstance.return_value = True # Testing that appropriate directories are given to safe_create_dir and # appropriate filepaths are passed to open. expected_uuid = 'abcd' mock_elem = mock.MagicMock() mock_elem.uuid.return_value = expected_uuid dfs = DataFileSet('/', uuid_chunk=None) # noinspection PyTypeChecker dfs.add_data(mock_elem) m_scd.assert_called_with('/') m_open.assert_called_with('/UUID_abcd.dataElement', 'wb') dfs = DataFileSet('/', uuid_chunk=1) # noinspection PyTypeChecker dfs.add_data(mock_elem) m_scd.assert_called_with('/') m_open.assert_called_with('/UUID_abcd.dataElement', 'wb') dfs = DataFileSet('/', uuid_chunk=2) # noinspection PyTypeChecker dfs.add_data(mock_elem) m_scd.assert_called_with('/ab') m_open.assert_called_with('/ab/UUID_abcd.dataElement', 'wb')
def test_add_data_not_dataelement(self) -> None: dfs = DataFileSet('/') self.assertRaisesRegex(AssertionError, "^Not given a DataElement for addition:", dfs.add_data, 'not a dataElement')
def test_count(self) -> None: expected_file_tree_iter = ['/a', '/b', '/d'] dfs = DataFileSet('/') dfs._iter_file_tree = mock.MagicMock( # type: ignore return_value=expected_file_tree_iter) self.assertEqual(dfs.count(), 3)