def test_constructor_only_name(self): mock_mongo_client = MagicMock() with patch('qilib.data_set.mongo_data_set_io.MongoClient', return_value={'qilib': { 'data_sets': mock_mongo_client }}) as mongo_client: mock_mongo_client.find_one.return_value = { '_id': ObjectId('5c9a3457e3306c41f7ae1f3e'), 'name': 'test_data_set' } mock_mongo_client.insert_one.return_value.inserted_id = ObjectId( '5c9a3457e3306c41f7ae1f3e') mongo_data_set_io = MongoDataSetIO(name='test_data_set') mongo_client.assert_called_once() mock_mongo_client.find_one.assert_called_once_with( {'name': 'test_data_set'}) self.assertEqual(mongo_data_set_io.name, 'test_data_set') self.assertEqual(mongo_data_set_io.id, '5c9a3457e3306c41f7ae1f3e')
def test_sync_data_array_to_storage(self): with patch('qilib.data_set.mongo_data_set_io_writer.MongoDataSetIO' ) as mongo_data_set_io: mongo_data_set_io.encode_numpy_array = MongoDataSetIO.encode_numpy_array name = 'test' document_id = '0x2A' writer = MongoDataSetIOWriter(name=name, document_id=document_id) mongo_data_set_io.assert_called_once_with(name, document_id, collection='data_sets', database='qilib') set_array = DataArray(name='set_array', label='for_testing', is_setpoint=True, shape=(2, 2)) data_array = DataArray(name='the_array', label='unit_test', unit='T', is_setpoint=False, preset_data=None, set_arrays=[set_array], shape=(2, 2)) writer.sync_add_data_array_to_storage(data_array) expected = { 'data_arrays.the_array': { 'name': 'the_array', 'label': "unit_test", 'unit': 'T', 'is_setpoint': False, 'set_arrays': ['set_array'], 'preset_data': MongoDataSetIO.encode_numpy_array(data_array) } } mongo_data_set_io.assert_has_calls([ call('test', '0x2A', collection='data_sets', database='qilib'), call().update_document(expected) ])
def test_finalize(self): with patch( 'qilib.data_set.mongo_data_set_io.MongoClient') as mock_client: mongo_data_set_io = MongoDataSetIO(name='test_data_set') mongo_data_set_io.finalize() mock_client.assert_has_calls([call().close()])
class MongoDataSetIOReader(DataSetIOReader): """ Allows a DataSet to subscribe to changes, and updates, in a mongodb.""" THREAD_ERROR = 'thread_error' def __init__( self, name: Optional[str] = None, document_id: Optional[str] = None, database: str = MongoDataSetIO.DEFAULT_DATABASE_NAME, collection: str = MongoDataSetIO.DEFAULT_COLLECTION_NAME) -> None: """ DataSetIOReader implementation for a mongodb. Note: The finalize method has to be called to close database connection and join the watcher thread. Args: name: Name of data set in the underlying mongodb. document_id: Name of data set in the underlying mongodb. database: Name of the database. collection: Name of the collections. Raises: DocumentNotFoundError: If no data set with document_id or name found in database. """ super().__init__() self._name = name self._id = document_id self._mongo_data_set_io = MongoDataSetIO(name, document_id, create_if_not_found=False, database=database, collection=collection) self._set_arrays: Dict[str, DataArray] = {} self._watcher = self._mongo_data_set_io.watch() watchers.append(self._watcher) self._update_queue = Queue() # type: ignore self._update_thread = Thread(target=self._update_worker, args=(self._update_queue, self._watcher)) self._update_thread.daemon = True self._update_thread.start() self._data_set: Any = None def __del__(self) -> None: self._watcher.close() watchers.remove(self._watcher) self._update_thread.join(1) def sync_from_storage(self, timeout: float) -> None: """ Poll the Mongo database for changes and apply any to the bound data_set. Args: timeout: Stop syncing if collecting an item takes longer than the timeout time. The timeout can be -1 (blocking), 0 (non-blocking), or >0 (wait at most that many seconds). Raises: TimeoutError: If timeout is reached while the storage queue is still empty """ blocking = timeout != 0 empty_queue = self._update_queue.empty() if timeout == 0 else False while not empty_queue: try: document = self._update_queue.get( blocking, timeout if timeout > 0 else None) except Empty as e: raise TimeoutError from e if MongoDataSetIOReader.THREAD_ERROR in document: raise ThreadError( 'Watcher thread has stopped unexpectedly.') from document[ MongoDataSetIOReader.THREAD_ERROR] updated_fields = document['updateDescription']['updatedFields'] adjusted_updates = self._convert_dot_notation_to_dict( updated_fields) self._update_data_set(adjusted_updates) empty_queue = self._update_queue.empty() def _convert_dot_notation_to_dict( self, updated_fields: Dict[str, Any]) -> Dict[str, Any]: """ If a nested field is updated in the mongo database, the nested document is replace with a dot notation that does not match the data set that is to be updated. For example: {data_arrays.array_name: {<data>}} Needs to be converted to: {data_arrays: {array_name: {<data>}} Args: updated_fields: An update event from the mongo database change stream. Returns: An updated dictionary without the dot notation. """ adjusted_updates: Dict[str, Any] = {} for update in updated_fields.keys(): key = update.split('.') if len(key) > 1 and key[1].isnumeric(): # {array_updates.index: [<index>, <data>] -> {array_updates: [[<index>, <data>]] adjusted_updates[key[0]] = [updated_fields[update]] elif len(key) > 1 and (key[0] == self.DATA_ARRAYS or key[0] == self.METADATA): # {data_arrays.array_name: {<data>}} -> {data_arrays: {array_name: {<data>}} adjusted_updates[key[0]] = {key[1]: updated_fields[update]} else: adjusted_updates[key[0]] = updated_fields[update] return adjusted_updates def bind_data_set(self, data_set: DataSet) -> None: """ Binds the DataSet to the DataSetIOReader. Args: data_set: The object that encompasses DataArrays. """ self._data_set = data_set document = self._mongo_data_set_io.get_document() self._data_set.name = document.get('name') self._update_data_set(document) def _update_data_set(self, document: Any) -> None: if self.METADATA in document: for field, value in document.get(self.METADATA).items(): setattr(self._data_set, field, value) if self.DATA_ARRAYS in document: set_arrays = list( filter(lambda a: a['is_setpoint'], document.get(self.DATA_ARRAYS).values())) self._update_set_arrays(set_arrays) data_arrays = list( filter(lambda a: not a['is_setpoint'], document.get(self.DATA_ARRAYS).values())) for array in data_arrays: if hasattr(self._data_set, array['name']): self._update_data_array(array) else: self._data_set.add_array(self._construct_data_array(array)) if self.ARRAY_UPDATES in document: for array_update in document.get(self.ARRAY_UPDATES): index_or_slice = tuple(array_update[0]) if isinstance( array_update[0], list) else array_update[0] data = array_update[1] self._data_set.add_data(index_or_slice, data) @staticmethod def load( name: Optional[str] = None, document_id: Optional[str] = None, database: str = MongoDataSetIO.DEFAULT_DATABASE_NAME, collection: str = MongoDataSetIO.DEFAULT_COLLECTION_NAME ) -> DataSet: """ Load an existing data set from the mongodb. Args: name: Name of the data set. document_id: _id of the data set. database: Name of the database. collection: Name of the collections. Returns: A new instance of the underlying data set. Raises: DocumentNotFoundError: If document_id or name do not match any data set in database. """ reader = MongoDataSetIOReader(name, document_id, database=database, collection=collection) return DataSet(storage_reader=reader) def _update_set_arrays(self, arrays: List[Dict[str, Any]]) -> None: for array in arrays: if array['name'] not in self._set_arrays: self._set_arrays[array['name']] = self._construct_data_array( array) def _construct_data_array(self, array: Dict[str, Any]) -> DataArray: set_arrays = [self._set_arrays[name] for name in array['set_arrays']] data_array = DataArray(name=array['name'], label=array['label'], unit=array['unit'], is_setpoint=array['is_setpoint'], preset_data=MongoDataSetIO.decode_numpy_array( array['preset_data']), set_arrays=set_arrays) return data_array def _update_data_array(self, array: Dict[str, Any]) -> None: np_array = MongoDataSetIO.decode_numpy_array(array['preset_data']) self._data_set.data_arrays[array['name']].label = array['label'] self._data_set.data_arrays[array['name']].unit = array['unit'] for i in range(len(np_array)): self._data_set.data_arrays[array['name']][i] = np_array[i] @staticmethod def _update_worker(queue: Any, watcher: CollectionChangeStream) -> None: try: while True: document = watcher.next() queue.put(document) except (StopIteration, InvalidOperation, OperationFailure) as e: queue.put({MongoDataSetIOReader.THREAD_ERROR: e})
def test_sync_from_storage_array(self): mock_queue = MagicMock() with patch('qilib.data_set.mongo_data_set_io_reader.MongoDataSetIO') as mock_io, patch( 'qilib.data_set.mongo_data_set_io_reader.Thread') as thread, \ patch('qilib.data_set.mongo_data_set_io_reader.Queue', return_value=mock_queue): mock_io.decode_numpy_array = MongoDataSetIO.decode_numpy_array reader = MongoDataSetIOReader(name='test') thread.assert_called_once() mock_io.assert_called_once_with('test', None, create_if_not_found=False, collection='data_sets', database='qilib') data_set = DataSet(storage_reader=reader) set_array = DataArray(name='setter', label='for_testing', is_setpoint=True, preset_data=np.array(range(0, 2))) set_array[0] = 42 set_array[1] = 25 data_array = DataArray(name='test_array', label='lab', unit='V', is_setpoint=False, set_arrays=[set_array], shape=(2, )) update_data = { "data_arrays": { "setter": { "name": set_array.name, "label": set_array.label, "unit": set_array.unit, "is_setpoint": set_array.is_setpoint, "set_arrays": [array.name for array in set_array.set_arrays], "preset_data": MongoDataSetIO.encode_numpy_array(set_array) } } } mock_queue.get.return_value = { 'updateDescription': { 'updatedFields': update_data } } data_set.sync_from_storage(-1) update_data = { "data_arrays.test_array": { "name": data_array.name, "label": data_array.label, "unit": data_array.unit, "is_setpoint": data_array.is_setpoint, "set_arrays": [array.name for array in data_array.set_arrays], "preset_data": MongoDataSetIO.encode_numpy_array(data_array) } } mock_queue.get.return_value = { 'updateDescription': { 'updatedFields': update_data } } data_set.sync_from_storage(-1) self.assertEqual('test_array', data_set.test_array.name) self.assertEqual('lab', data_set.test_array.label) self.assertEqual('V', data_set.test_array.unit) self.assertFalse(data_set.test_array.is_setpoint) self.assertEqual('setter', data_array.set_arrays[0].name) self.assertListEqual([42, 25], list(data_array.set_arrays[0])) data_array[0] = 255 update_data["data_arrays.test_array"][ "preset_data"] = MongoDataSetIO.encode_numpy_array(data_array) data_set.sync_from_storage(-1) self.assertEqual(255, data_set.test_array[0])
class MongoDataSetIOWriter(DataSetIOWriter): """ Allow a DataSet to store changes, and complete DataSet, to a mongodb.""" def __init__( self, name: Optional[str] = None, document_id: Optional[str] = None, database: str = MongoDataSetIO.DEFAULT_DATABASE_NAME, collection: str = MongoDataSetIO.DEFAULT_COLLECTION_NAME) -> None: """ Construct a new instance of MongoDataSetIOWriter. If name is provided, but not found in the database a new document is created with that name. Args: name: DataSet name. document_id: _id of the DataSet in the database. database: Name of the database. collection: Name of the collections. Raises: DocumentNotFoundError: If document_id is provided but not found in the database. """ super().__init__() self._mongo_data_set_io = MongoDataSetIO(name, document_id, database=database, collection=collection) def sync_metadata_to_storage(self, field_name: str, value: Any) -> None: """ Update or add metadata field to database. Args: field_name: Field that changed. value: The new value. """ self._is_finalized() update_data = { "{}.{}".format(DataSetIOReader.METADATA, field_name): value } self._mongo_data_set_io.update_document(update_data) def sync_data_to_storage(self, index_or_slice: Union[Tuple[int, ...], int], data: Dict[str, Any]) -> None: """ Registers a DataArray update to the database. The change is registered as a change event and is applied on finalize(). Args: index_or_slice: The indices of the DataArray to update. data: Name of the DataArray to be updated and the new value. """ self._is_finalized() update_data = {DataSetIOReader.ARRAY_UPDATES: (index_or_slice, data)} self._mongo_data_set_io.append_to_document(update_data) def sync_add_data_array_to_storage(self, data_array: DataArray) -> None: """ Add or update a DataArray in the database. Args: data_array: The DataArray to be updated or added. """ self._is_finalized() update_data = { "{}.{}".format(DataSetIOReader.DATA_ARRAYS, data_array.name): { "name": data_array.name, "label": data_array.label, "unit": data_array.unit, "is_setpoint": data_array.is_setpoint, "set_arrays": [array.name for array in data_array.set_arrays], "preset_data": MongoDataSetIO.encode_numpy_array(data_array) } } self._mongo_data_set_io.update_document(update_data) def finalize(self) -> None: """ Update the underlying DataSet and close the connection to the database.""" self._mongo_data_set_io.update_document( {DataSetIOReader.ARRAY_UPDATES: []}) self._mongo_data_set_io.finalize() self._finalized = True