Esempio n. 1
0
def test_metadata(experiment, request):

    metadata1 = {'number': 1, "string": "Once upon a time..."}
    metadata2 = {'more': 'meta'}

    ds1 = DataSet(metadata=metadata1)
    request.addfinalizer(ds1.conn.close)
    ds2 = DataSet(metadata=metadata2)
    request.addfinalizer(ds2.conn.close)

    assert ds1.run_id == 1
    assert ds1.metadata == metadata1
    assert ds2.run_id == 2
    assert ds2.metadata == metadata2

    loaded_ds1 = DataSet(run_id=1)
    request.addfinalizer(loaded_ds1.conn.close)
    assert loaded_ds1.metadata == metadata1
    loaded_ds2 = DataSet(run_id=2)
    request.addfinalizer(loaded_ds2.conn.close)
    assert loaded_ds2.metadata == metadata2

    badtag = 'lex luthor'
    sorry_metadata = {'superman': 1, badtag: None, 'spiderman': 'two'}

    bad_tag_msg = (f'Tag {badtag} has value None. '
                    ' That is not a valid metadata value!')

    with pytest.raises(RuntimeError,
                       match='Rolling back due to unhandled exception') as e:
        for tag, value in sorry_metadata.items():
            ds1.add_metadata(tag, value)

    assert error_caused_by(e, bad_tag_msg)
Esempio n. 2
0
def test_metadata(experiment, request):

    metadata1 = {'number': 1, "string": "Once upon a time..."}
    metadata2 = {'more': 'meta'}

    ds1 = DataSet(metadata=metadata1)
    request.addfinalizer(ds1.conn.close)
    ds2 = DataSet(metadata=metadata2)
    request.addfinalizer(ds2.conn.close)

    assert ds1.run_id == 1
    assert ds1.metadata == metadata1
    assert ds2.run_id == 2
    assert ds2.metadata == metadata2

    loaded_ds1 = DataSet(run_id=1)
    request.addfinalizer(loaded_ds1.conn.close)
    assert loaded_ds1.metadata == metadata1
    loaded_ds2 = DataSet(run_id=2)
    request.addfinalizer(loaded_ds2.conn.close)
    assert loaded_ds2.metadata == metadata2

    bad_tag = "lex luthor"
    bad_tag_msg = (f"Tag {bad_tag} is not a valid tag. "
                   "Use only alphanumeric characters and underscores!")
    with pytest.raises(RuntimeError,
                       match="Rolling back due to unhandled exception") as e1:
        ds1.add_metadata(bad_tag, "value")
    assert error_caused_by(e1, bad_tag_msg)

    good_tag = "tag"
    none_value_msg = (f"Tag {good_tag} has value None. "
                      "That is not a valid metadata value!")
    with pytest.raises(RuntimeError,
                       match="Rolling back due to unhandled exception") as e2:
        ds1.add_metadata(good_tag, None)
    assert error_caused_by(e2, none_value_msg)
Esempio n. 3
0
def experiment_partially_labelled(empty_temp_db, tmp_path):
    e = new_experiment("test_experiment", sample_name="test_sample")
    for did in range(len(test_data_labels)):
        ds = DataSet(os.path.join(tmp_path, "temp.db"))

        nt_metadata, current_label = generate_default_metadata()
        stage = test_data_labels[did]
        if stage is not None:
            current_label[stage] = 1
            for label, value in current_label.items():
                ds.add_metadata(label, value)

        ds.add_metadata(nt.meta_tag, json.dumps(nt_metadata))
        ds.add_metadata("snapshot", json.dumps({}))
    try:
        yield e
    finally:
        e.conn.close()
Esempio n. 4
0
def second_third_experiment_labelled_data(second_empty_temp_db, tmp_path):

    e1 = new_experiment("test_experiment2", sample_name="test_sample")
    e2 = new_experiment("test_experiment3", sample_name="test_sample")

    for did in range(len(test_data_labels2)):
        ds = DataSet(os.path.join(tmp_path, "temp2.db"), exp_id=e2._exp_id)

        nt_metadata, current_label = generate_default_metadata()
        stage = test_data_labels2[did]
        if stage is not None:
            current_label[stage] = 1

        ds.add_metadata(nt.meta_tag, json.dumps(nt_metadata))
        ds.add_metadata("snapshot", json.dumps({}))
        for label, value in current_label.items():
            ds.add_metadata(label, value)
    try:
        yield e2
    finally:
        e1.conn.close()
        e2.conn.close()
def test_basic_extraction(two_empty_temp_db_connections, some_paramspecs):
    source_conn, target_conn = two_empty_temp_db_connections

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    type_casters = {'numeric': float,
                    'array': (lambda x: np.array(x) if hasattr(x, '__iter__')
                              else np.array([x])),
                    'text': str}

    source_exp = Experiment(conn=source_conn)
    source_dataset = DataSet(conn=source_conn, name="basic_copy_paste_name")

    with pytest.raises(RuntimeError) as excinfo:
        extract_runs_into_db(source_path, target_path, source_dataset.run_id)

    assert error_caused_by(excinfo, ('Dataset not completed. An incomplete '
                                     'dataset can not be copied. The '
                                     'incomplete dataset has GUID: '
                                     f'{source_dataset.guid} and run_id: '
                                     f'{source_dataset.run_id}'))

    for ps in some_paramspecs[1].values():
        source_dataset.add_parameter(ps)

    for value in range(10):
        result = {ps.name: type_casters[ps.type](value)
                  for ps in some_paramspecs[1].values()}
        source_dataset.add_result(result)

    source_dataset.add_metadata('goodness', 'fair')
    source_dataset.add_metadata('test', True)

    source_dataset.mark_complete()

    extract_runs_into_db(source_path, target_path, source_dataset.run_id)

    target_exp = Experiment(conn=target_conn, exp_id=1)

    length1 = len(target_exp)
    assert length1 == 1

    # trying to insert the same run again should be a NOOP
    with raise_if_file_changed(target_path):
        extract_runs_into_db(source_path, target_path, source_dataset.run_id)

    assert len(target_exp) == length1

    target_dataset = DataSet(conn=target_conn, run_id=1)

    # Now make the interesting comparisons: are the target objects the same as
    # the source objects?

    assert source_dataset.the_same_dataset_as(target_dataset)

    source_data = source_dataset.get_data(*source_dataset.parameters.split(','))
    target_data = target_dataset.get_data(*target_dataset.parameters.split(','))

    assert source_data == target_data

    exp_attrs = ['name', 'sample_name', 'format_string', 'started_at',
                 'finished_at']

    for exp_attr in exp_attrs:
        assert getattr(source_exp, exp_attr) == getattr(target_exp, exp_attr)

    # trying to insert the same run again should be a NOOP
    with raise_if_file_changed(target_path):
        extract_runs_into_db(source_path, target_path, source_dataset.run_id)
Esempio n. 6
0
class Runner:
    """
    Context manager for the measurement.

    Lives inside a :class:`Measurement` and should never be instantiated
    outside a Measurement.

    This context manager handles all the dirty business of writing data
    to the database. Additionally, it may perform experiment bootstrapping
    and clean-up after a measurement.
    """
    def __init__(
        self,
        enteractions: Sequence[ActionType],
        exitactions: Sequence[ActionType],
        experiment: Optional[Experiment] = None,
        station: Optional[Station] = None,
        write_period: Optional[float] = None,
        interdeps: InterDependencies_ = InterDependencies_(),
        name: str = "",
        subscribers: Optional[Sequence[SubscriberType]] = None,
        parent_datasets: Sequence[Mapping[Any, Any]] = (),
        extra_log_info: str = "",
        write_in_background: bool = False,
        shapes: Optional[Shapes] = None,
        in_memory_cache: bool = True,
        dataset_class: DataSetType = DataSetType.DataSet,
    ) -> None:

        self._dataset_class = dataset_class
        self.write_period = self._calculate_write_period(
            write_in_background, write_period)

        self.enteractions = enteractions
        self.exitactions = exitactions
        self.subscribers: Sequence[SubscriberType]
        if subscribers is None:
            self.subscribers = []
        else:
            self.subscribers = subscribers
        self.experiment = experiment
        self.station = station
        self._interdependencies = interdeps
        self._shapes: Shapes = shapes
        self.name = name if name else 'results'
        self._parent_datasets = parent_datasets
        self._extra_log_info = extra_log_info
        self._write_in_background = write_in_background
        self._in_memory_cache = in_memory_cache
        self.ds: DataSetProtocol

    @staticmethod
    def _calculate_write_period(write_in_background: bool,
                                write_period: Optional[float]) -> float:
        write_period_changed_from_default = (
            write_period is not None
            and write_period != qc.config.defaults.dataset.write_period)
        if write_in_background and write_period_changed_from_default:
            warnings.warn(f"The specified write period of {write_period} s "
                          "will be ignored, since write_in_background==True")
        if write_in_background:
            return 0.0
        if write_period is None:
            write_period = qc.config.dataset.write_period
        return float(write_period)

    def __enter__(self) -> DataSaver:
        # TODO: should user actions really precede the dataset?
        # first do whatever bootstrapping the user specified

        for func, args in self.enteractions:
            func(*args)

        dataset_class: Type[DataSetProtocol]

        # next set up the "datasaver"
        if self.experiment is not None:
            exp_id: Optional[int] = self.experiment.exp_id
            path_to_db: Optional[str] = self.experiment.path_to_db
            conn: Optional["ConnectionPlus"] = self.experiment.conn
        else:
            exp_id = None
            path_to_db = None
            conn = None

        if self._dataset_class is DataSetType.DataSet:
            self.ds = DataSet(
                name=self.name,
                exp_id=exp_id,
                conn=conn,
                in_memory_cache=self._in_memory_cache,
            )
        elif self._dataset_class is DataSetType.DataSetInMem:
            if self._in_memory_cache is False:
                raise RuntimeError("Cannot disable the in memory cache for a "
                                   "dataset that is only in memory.")
            self.ds = DataSetInMem._create_new_run(
                name=self.name,
                exp_id=exp_id,
                path_to_db=path_to_db,
            )
        else:
            raise RuntimeError("Does not support any other dataset classes")

        # .. and give the dataset a snapshot as metadata
        if self.station is None:
            station = Station.default
        else:
            station = self.station

        if station is not None:
            snapshot = station.snapshot()
        else:
            snapshot = {}

        self.ds.prepare(
            snapshot=snapshot,
            interdeps=self._interdependencies,
            write_in_background=self._write_in_background,
            shapes=self._shapes,
            parent_datasets=self._parent_datasets,
        )

        # register all subscribers
        if isinstance(self.ds, DataSet):
            for (callble, state) in self.subscribers:
                # We register with minimal waiting time.
                # That should make all subscribers be called when data is flushed
                # to the database
                log.debug(f"Subscribing callable {callble} with state {state}")
                self.ds.subscribe(callble,
                                  min_wait=0,
                                  min_count=1,
                                  state=state)

        print(f"Starting experimental run with id: {self.ds.captured_run_id}."
              f" {self._extra_log_info}")
        log.info(f"Starting measurement with guid: {self.ds.guid}, "
                 f'sample_name: "{self.ds.sample_name}", '
                 f'exp_name: "{self.ds.exp_name}", '
                 f'ds_name: "{self.ds.name}". '
                 f"{self._extra_log_info}")
        log.info(f"Using background writing: {self._write_in_background}")

        self.datasaver = DataSaver(dataset=self.ds,
                                   write_period=self.write_period,
                                   interdeps=self._interdependencies)

        return self.datasaver

    def __exit__(self, exception_type: Optional[Type[BaseException]],
                 exception_value: Optional[BaseException],
                 traceback: Optional[TracebackType]) -> None:
        with DelayedKeyboardInterrupt():
            self.datasaver.flush_data_to_database(block=True)

            # perform the "teardown" events
            for func, args in self.exitactions:
                func(*args)

            if exception_type:
                # if an exception happened during the measurement,
                # log the exception
                stream = io.StringIO()
                tb_module.print_exception(exception_type,
                                          exception_value,
                                          traceback,
                                          file=stream)
                exception_string = stream.getvalue()
                log.warning('An exception occured in measurement with guid: '
                            f'{self.ds.guid};\nTraceback:\n{exception_string}')
                self.ds.add_metadata("measurement_exception", exception_string)

            # and finally mark the dataset as closed, thus
            # finishing the measurement
            # Note that the completion of a dataset entails waiting for the
            # write thread to terminate (iff the write thread has been started)
            self.ds.mark_completed()
            if get_data_export_automatic():
                self.datasaver.export_data()
            log.info(f'Finished measurement with guid: {self.ds.guid}. '
                     f'{self._extra_log_info}')
            if isinstance(self.ds, DataSet):
                self.ds.unsubscribe_all()