def test_metadata(experiment, request): metadata1 = {'number': 1, "string": "Once upon a time..."} metadata2 = {'more': 'meta'} ds1 = DataSet(metadata=metadata1) request.addfinalizer(ds1.conn.close) ds2 = DataSet(metadata=metadata2) request.addfinalizer(ds2.conn.close) assert ds1.run_id == 1 assert ds1.metadata == metadata1 assert ds2.run_id == 2 assert ds2.metadata == metadata2 loaded_ds1 = DataSet(run_id=1) request.addfinalizer(loaded_ds1.conn.close) assert loaded_ds1.metadata == metadata1 loaded_ds2 = DataSet(run_id=2) request.addfinalizer(loaded_ds2.conn.close) assert loaded_ds2.metadata == metadata2 badtag = 'lex luthor' sorry_metadata = {'superman': 1, badtag: None, 'spiderman': 'two'} bad_tag_msg = (f'Tag {badtag} has value None. ' ' That is not a valid metadata value!') with pytest.raises(RuntimeError, match='Rolling back due to unhandled exception') as e: for tag, value in sorry_metadata.items(): ds1.add_metadata(tag, value) assert error_caused_by(e, bad_tag_msg)
def test_metadata(experiment, request): metadata1 = {'number': 1, "string": "Once upon a time..."} metadata2 = {'more': 'meta'} ds1 = DataSet(metadata=metadata1) request.addfinalizer(ds1.conn.close) ds2 = DataSet(metadata=metadata2) request.addfinalizer(ds2.conn.close) assert ds1.run_id == 1 assert ds1.metadata == metadata1 assert ds2.run_id == 2 assert ds2.metadata == metadata2 loaded_ds1 = DataSet(run_id=1) request.addfinalizer(loaded_ds1.conn.close) assert loaded_ds1.metadata == metadata1 loaded_ds2 = DataSet(run_id=2) request.addfinalizer(loaded_ds2.conn.close) assert loaded_ds2.metadata == metadata2 bad_tag = "lex luthor" bad_tag_msg = (f"Tag {bad_tag} is not a valid tag. " "Use only alphanumeric characters and underscores!") with pytest.raises(RuntimeError, match="Rolling back due to unhandled exception") as e1: ds1.add_metadata(bad_tag, "value") assert error_caused_by(e1, bad_tag_msg) good_tag = "tag" none_value_msg = (f"Tag {good_tag} has value None. " "That is not a valid metadata value!") with pytest.raises(RuntimeError, match="Rolling back due to unhandled exception") as e2: ds1.add_metadata(good_tag, None) assert error_caused_by(e2, none_value_msg)
def experiment_partially_labelled(empty_temp_db, tmp_path): e = new_experiment("test_experiment", sample_name="test_sample") for did in range(len(test_data_labels)): ds = DataSet(os.path.join(tmp_path, "temp.db")) nt_metadata, current_label = generate_default_metadata() stage = test_data_labels[did] if stage is not None: current_label[stage] = 1 for label, value in current_label.items(): ds.add_metadata(label, value) ds.add_metadata(nt.meta_tag, json.dumps(nt_metadata)) ds.add_metadata("snapshot", json.dumps({})) try: yield e finally: e.conn.close()
def second_third_experiment_labelled_data(second_empty_temp_db, tmp_path): e1 = new_experiment("test_experiment2", sample_name="test_sample") e2 = new_experiment("test_experiment3", sample_name="test_sample") for did in range(len(test_data_labels2)): ds = DataSet(os.path.join(tmp_path, "temp2.db"), exp_id=e2._exp_id) nt_metadata, current_label = generate_default_metadata() stage = test_data_labels2[did] if stage is not None: current_label[stage] = 1 ds.add_metadata(nt.meta_tag, json.dumps(nt_metadata)) ds.add_metadata("snapshot", json.dumps({})) for label, value in current_label.items(): ds.add_metadata(label, value) try: yield e2 finally: e1.conn.close() e2.conn.close()
def test_basic_extraction(two_empty_temp_db_connections, some_paramspecs): source_conn, target_conn = two_empty_temp_db_connections source_path = path_to_dbfile(source_conn) target_path = path_to_dbfile(target_conn) type_casters = {'numeric': float, 'array': (lambda x: np.array(x) if hasattr(x, '__iter__') else np.array([x])), 'text': str} source_exp = Experiment(conn=source_conn) source_dataset = DataSet(conn=source_conn, name="basic_copy_paste_name") with pytest.raises(RuntimeError) as excinfo: extract_runs_into_db(source_path, target_path, source_dataset.run_id) assert error_caused_by(excinfo, ('Dataset not completed. An incomplete ' 'dataset can not be copied. The ' 'incomplete dataset has GUID: ' f'{source_dataset.guid} and run_id: ' f'{source_dataset.run_id}')) for ps in some_paramspecs[1].values(): source_dataset.add_parameter(ps) for value in range(10): result = {ps.name: type_casters[ps.type](value) for ps in some_paramspecs[1].values()} source_dataset.add_result(result) source_dataset.add_metadata('goodness', 'fair') source_dataset.add_metadata('test', True) source_dataset.mark_complete() extract_runs_into_db(source_path, target_path, source_dataset.run_id) target_exp = Experiment(conn=target_conn, exp_id=1) length1 = len(target_exp) assert length1 == 1 # trying to insert the same run again should be a NOOP with raise_if_file_changed(target_path): extract_runs_into_db(source_path, target_path, source_dataset.run_id) assert len(target_exp) == length1 target_dataset = DataSet(conn=target_conn, run_id=1) # Now make the interesting comparisons: are the target objects the same as # the source objects? assert source_dataset.the_same_dataset_as(target_dataset) source_data = source_dataset.get_data(*source_dataset.parameters.split(',')) target_data = target_dataset.get_data(*target_dataset.parameters.split(',')) assert source_data == target_data exp_attrs = ['name', 'sample_name', 'format_string', 'started_at', 'finished_at'] for exp_attr in exp_attrs: assert getattr(source_exp, exp_attr) == getattr(target_exp, exp_attr) # trying to insert the same run again should be a NOOP with raise_if_file_changed(target_path): extract_runs_into_db(source_path, target_path, source_dataset.run_id)
class Runner: """ Context manager for the measurement. Lives inside a :class:`Measurement` and should never be instantiated outside a Measurement. This context manager handles all the dirty business of writing data to the database. Additionally, it may perform experiment bootstrapping and clean-up after a measurement. """ def __init__( self, enteractions: Sequence[ActionType], exitactions: Sequence[ActionType], experiment: Optional[Experiment] = None, station: Optional[Station] = None, write_period: Optional[float] = None, interdeps: InterDependencies_ = InterDependencies_(), name: str = "", subscribers: Optional[Sequence[SubscriberType]] = None, parent_datasets: Sequence[Mapping[Any, Any]] = (), extra_log_info: str = "", write_in_background: bool = False, shapes: Optional[Shapes] = None, in_memory_cache: bool = True, dataset_class: DataSetType = DataSetType.DataSet, ) -> None: self._dataset_class = dataset_class self.write_period = self._calculate_write_period( write_in_background, write_period) self.enteractions = enteractions self.exitactions = exitactions self.subscribers: Sequence[SubscriberType] if subscribers is None: self.subscribers = [] else: self.subscribers = subscribers self.experiment = experiment self.station = station self._interdependencies = interdeps self._shapes: Shapes = shapes self.name = name if name else 'results' self._parent_datasets = parent_datasets self._extra_log_info = extra_log_info self._write_in_background = write_in_background self._in_memory_cache = in_memory_cache self.ds: DataSetProtocol @staticmethod def _calculate_write_period(write_in_background: bool, write_period: Optional[float]) -> float: write_period_changed_from_default = ( write_period is not None and write_period != qc.config.defaults.dataset.write_period) if write_in_background and write_period_changed_from_default: warnings.warn(f"The specified write period of {write_period} s " "will be ignored, since write_in_background==True") if write_in_background: return 0.0 if write_period is None: write_period = qc.config.dataset.write_period return float(write_period) def __enter__(self) -> DataSaver: # TODO: should user actions really precede the dataset? # first do whatever bootstrapping the user specified for func, args in self.enteractions: func(*args) dataset_class: Type[DataSetProtocol] # next set up the "datasaver" if self.experiment is not None: exp_id: Optional[int] = self.experiment.exp_id path_to_db: Optional[str] = self.experiment.path_to_db conn: Optional["ConnectionPlus"] = self.experiment.conn else: exp_id = None path_to_db = None conn = None if self._dataset_class is DataSetType.DataSet: self.ds = DataSet( name=self.name, exp_id=exp_id, conn=conn, in_memory_cache=self._in_memory_cache, ) elif self._dataset_class is DataSetType.DataSetInMem: if self._in_memory_cache is False: raise RuntimeError("Cannot disable the in memory cache for a " "dataset that is only in memory.") self.ds = DataSetInMem._create_new_run( name=self.name, exp_id=exp_id, path_to_db=path_to_db, ) else: raise RuntimeError("Does not support any other dataset classes") # .. and give the dataset a snapshot as metadata if self.station is None: station = Station.default else: station = self.station if station is not None: snapshot = station.snapshot() else: snapshot = {} self.ds.prepare( snapshot=snapshot, interdeps=self._interdependencies, write_in_background=self._write_in_background, shapes=self._shapes, parent_datasets=self._parent_datasets, ) # register all subscribers if isinstance(self.ds, DataSet): for (callble, state) in self.subscribers: # We register with minimal waiting time. # That should make all subscribers be called when data is flushed # to the database log.debug(f"Subscribing callable {callble} with state {state}") self.ds.subscribe(callble, min_wait=0, min_count=1, state=state) print(f"Starting experimental run with id: {self.ds.captured_run_id}." f" {self._extra_log_info}") log.info(f"Starting measurement with guid: {self.ds.guid}, " f'sample_name: "{self.ds.sample_name}", ' f'exp_name: "{self.ds.exp_name}", ' f'ds_name: "{self.ds.name}". ' f"{self._extra_log_info}") log.info(f"Using background writing: {self._write_in_background}") self.datasaver = DataSaver(dataset=self.ds, write_period=self.write_period, interdeps=self._interdependencies) return self.datasaver def __exit__(self, exception_type: Optional[Type[BaseException]], exception_value: Optional[BaseException], traceback: Optional[TracebackType]) -> None: with DelayedKeyboardInterrupt(): self.datasaver.flush_data_to_database(block=True) # perform the "teardown" events for func, args in self.exitactions: func(*args) if exception_type: # if an exception happened during the measurement, # log the exception stream = io.StringIO() tb_module.print_exception(exception_type, exception_value, traceback, file=stream) exception_string = stream.getvalue() log.warning('An exception occured in measurement with guid: ' f'{self.ds.guid};\nTraceback:\n{exception_string}') self.ds.add_metadata("measurement_exception", exception_string) # and finally mark the dataset as closed, thus # finishing the measurement # Note that the completion of a dataset entails waiting for the # write thread to terminate (iff the write thread has been started) self.ds.mark_completed() if get_data_export_automatic(): self.datasaver.export_data() log.info(f'Finished measurement with guid: {self.ds.guid}. ' f'{self._extra_log_info}') if isinstance(self.ds, DataSet): self.ds.unsubscribe_all()