Exemple #1
0
def test_dataset_states():
    """
    Test the interplay between pristine, started, running, and completed
    """

    ds = DataSet()

    assert ds.pristine is True
    assert ds.running is False
    assert ds.started is False
    assert ds.completed is False

    with pytest.raises(RuntimeError,
                       match='Can not mark DataSet as complete '
                       'before it has '
                       'been marked as started.'):
        ds.mark_completed()

    match = ('This DataSet has not been marked as started. '
             'Please mark the DataSet as started before '
             'adding results to it.')
    with pytest.raises(RuntimeError, match=match):
        ds.add_results([{'x': 1}])

    parameter = ParamSpecBase(name='single',
                              paramtype='numeric',
                              label='',
                              unit='N/A')
    idps = InterDependencies_(standalones=(parameter, ))
    ds.set_interdependencies(idps)

    ds.mark_started()

    assert ds.pristine is False
    assert ds.running is True
    assert ds.started is True
    assert ds.completed is False

    match = ('Can not set interdependencies on a DataSet that has '
             'been started.')

    with pytest.raises(RuntimeError, match=match):
        ds.set_interdependencies(idps)

    ds.add_results([{parameter.name: 1}])

    ds.mark_completed()

    assert ds.pristine is False
    assert ds.running is False
    assert ds.started is True
    assert ds.completed is True

    match = ('Can not set interdependencies on a DataSet that has '
             'been started.')

    with pytest.raises(RuntimeError, match=match):
        ds.set_interdependencies(idps)

    match = ('This DataSet is complete, no further '
             'results can be added to it.')

    with pytest.raises(CompletedError, match=match):
        ds.add_results([{parameter.name: 1}])
Exemple #2
0
class DataSet(Sized):

    # the "persistent traits" are the attributes/properties of the DataSet
    # that are NOT tied to the representation of the DataSet in any particular
    # database
    persistent_traits = ('name', 'guid', 'number_of_results', 'parameters',
                         'paramspecs', 'exp_name', 'sample_name', 'completed',
                         'snapshot', 'run_timestamp_raw', 'description',
                         'completed_timestamp_raw', 'metadata')

    def __init__(self,
                 path_to_db: str = None,
                 run_id: Optional[int] = None,
                 conn: Optional[ConnectionPlus] = None,
                 exp_id=None,
                 name: str = None,
                 specs: Optional[SpecsOrInterDeps] = None,
                 values=None,
                 metadata=None) -> None:
        """
        Create a new DataSet object. The object can either hold a new run or
        an already existing run. If a run_id is provided, then an old run is
        looked up, else a new run is created.

        Args:
            path_to_db: path to the sqlite file on disk. If not provided, the
              path will be read from the config.
            run_id: provide this when loading an existing run, leave it
              as None when creating a new run
            conn: connection to the DB; if provided and `path_to_db` is
              provided as well, then a ValueError is raised (this is to
              prevent the possibility of providing a connection to a DB
              file that is different from `path_to_db`)
            exp_id: the id of the experiment in which to create a new run.
              Ignored if run_id is provided.
            name: the name of the dataset. Ignored if run_id is provided.
            specs: paramspecs belonging to the dataset. Ignored if run_id is
              provided.
            values: values to insert into the dataset. Ignored if run_id is
              provided.
            metadata: metadata to insert into the dataset. Ignored if run_id
              is provided.
        """
        if path_to_db is not None and conn is not None:
            raise ValueError("Both `path_to_db` and `conn` arguments have "
                             "been passed together with non-None values. "
                             "This is not allowed.")
        self._path_to_db = path_to_db or get_DB_location()

        self.conn = make_connection_plus_from(conn) if conn is not None else \
            connect(self.path_to_db)

        self._run_id = run_id
        self._debug = False
        self.subscribers: Dict[str, _Subscriber] = {}
        self._interdeps: InterDependencies_

        if run_id is not None:
            if not run_exists(self.conn, run_id):
                raise ValueError(f"Run with run_id {run_id} does not exist in "
                                 f"the database")
            self._completed = completed(self.conn, self.run_id)
            run_desc = self._get_run_description_from_db()
            self._interdeps = run_desc.interdeps
            self._metadata = get_metadata_from_run_id(self.conn, run_id)
            self._started = self.run_timestamp_raw is not None

        else:
            # Actually perform all the side effects needed for the creation
            # of a new dataset. Note that a dataset is created (in the DB)
            # with no parameters; they are written to disk when the dataset
            # is marked as started
            if exp_id is None:
                if len(get_experiments(self.conn)) > 0:
                    exp_id = get_last_experiment(self.conn)
                else:
                    raise ValueError("No experiments found."
                                     "You can start a new one with:"
                                     " new_experiment(name, sample_name)")
            name = name or "dataset"
            _, run_id, __ = create_run(self.conn,
                                       exp_id,
                                       name,
                                       generate_guid(),
                                       parameters=None,
                                       values=values,
                                       metadata=metadata)
            # this is really the UUID (an ever increasing count in the db)
            self._run_id = run_id
            self._completed = False
            self._started = False
            if isinstance(specs, InterDependencies_):
                self._interdeps = specs
            elif specs is not None:
                self._interdeps = old_to_new(InterDependencies(*specs))
            else:
                self._interdeps = InterDependencies_()
            self._metadata = get_metadata_from_run_id(self.conn, self.run_id)

    @property
    def run_id(self):
        return self._run_id

    @property
    def path_to_db(self):
        return self._path_to_db

    @property
    def name(self):
        return select_one_where(self.conn, "runs", "name", "run_id",
                                self.run_id)

    @property
    def table_name(self):
        return select_one_where(self.conn, "runs", "result_table_name",
                                "run_id", self.run_id)

    @property
    def guid(self):
        return get_guid_from_run_id(self.conn, self.run_id)

    @property
    def snapshot(self) -> Optional[dict]:
        """Snapshot of the run as dictionary (or None)"""
        snapshot_json = self.snapshot_raw
        if snapshot_json is not None:
            return json.loads(snapshot_json)
        else:
            return None

    @property
    def snapshot_raw(self) -> Optional[str]:
        """Snapshot of the run as a JSON-formatted string (or None)"""
        return select_one_where(self.conn, "runs", "snapshot", "run_id",
                                self.run_id)

    @property
    def number_of_results(self):
        sql = f'SELECT COUNT(*) FROM "{self.table_name}"'
        cursor = atomic_transaction(self.conn, sql)
        return one(cursor, 'COUNT(*)')

    @property
    def counter(self):
        return select_one_where(self.conn, "runs", "result_counter", "run_id",
                                self.run_id)

    @property
    def parameters(self) -> str:
        if self.pristine:
            psnames = [ps.name for ps in self.description.interdeps.paramspecs]
            return ','.join(psnames)
        else:
            return select_one_where(self.conn, "runs", "parameters", "run_id",
                                    self.run_id)

    @property
    def paramspecs(self) -> Dict[str, Union[ParamSpec, ParamSpecBase]]:
        params: Sequence
        if self.pristine:
            params = self.description.interdeps.paramspecs
        else:
            params = self.get_parameters()
        return {ps.name: ps for ps in params}

    @property
    def exp_id(self) -> int:
        return select_one_where(self.conn, "runs", "exp_id", "run_id",
                                self.run_id)

    @property
    def exp_name(self) -> str:
        return get_experiment_name_from_experiment_id(self.conn, self.exp_id)

    @property
    def sample_name(self) -> str:
        return get_sample_name_from_experiment_id(self.conn, self.exp_id)

    @property
    def run_timestamp_raw(self) -> Optional[float]:
        """
        Returns run timestamp as number of seconds since the Epoch

        The run timestamp is the moment when the measurement for this run
        started.
        """
        return get_run_timestamp_from_run_id(self.conn, self.run_id)

    @property
    def description(self) -> RunDescriber:
        return RunDescriber(interdeps=self._interdeps)

    @property
    def metadata(self) -> Dict:
        return self._metadata

    def the_same_dataset_as(self, other: 'DataSet') -> bool:
        """
        Check if two datasets correspond to the same run by comparing
        all their persistent traits. Note that this method
        does not compare the data itself.

        This function raises if the GUIDs match but anything else doesn't

        Args:
            other: the dataset to compare self to
        """

        if not isinstance(other, DataSet):
            return False

        guids_match = self.guid == other.guid

        for attr in DataSet.persistent_traits:
            if getattr(self, attr) != getattr(other, attr):
                if guids_match:
                    raise RuntimeError('Critical inconsistency detected! '
                                       'The two datasets have the same GUID,'
                                       f' but their "{attr}" differ.')
                else:
                    return False

        return True

    def run_timestamp(self, fmt: str = "%Y-%m-%d %H:%M:%S") -> Optional[str]:
        """
        Returns run timestamp in a human-readable format

        The run timestamp is the moment when the measurement for this run
        started. If the run has not yet been started, this function returns
        None.

        Consult with `time.strftime` for information about the format.
        """
        if self.run_timestamp_raw is None:
            return None
        else:
            return time.strftime(fmt, time.localtime(self.run_timestamp_raw))

    @property
    def completed_timestamp_raw(self) -> Union[float, None]:
        """
        Returns timestamp when measurement run was completed
        as number of seconds since the Epoch

        If the run (or the dataset) is not completed, then returns None.
        """
        return get_completed_timestamp_from_run_id(self.conn, self.run_id)

    def completed_timestamp(self,
                            fmt: str = "%Y-%m-%d %H:%M:%S") -> Optional[str]:
        """
        Returns timestamp when measurement run was completed
        in a human-readable format

        If the run (or the dataset) is not completed, then returns None.

        Consult with `time.strftime` for information about the format.
        """
        completed_timestamp_raw = self.completed_timestamp_raw

        if completed_timestamp_raw:
            completed_timestamp: Optional[str] = time.strftime(
                fmt, time.localtime(completed_timestamp_raw))
        else:
            completed_timestamp = None

        return completed_timestamp

    def _get_run_description_from_db(self) -> RunDescriber:
        """
        Look up the run_description from the database
        """
        desc_str = get_run_description(self.conn, self.run_id)
        return serial.from_json_to_current(desc_str)

    def toggle_debug(self):
        """
        Toggle debug mode, if debug mode is on all the queries made are
        echoed back.
        """
        self._debug = not self._debug
        self.conn.close()
        self.conn = connect(self.path_to_db, self._debug)

    def add_parameter(self, spec: ParamSpec):
        """
        Old method; don't use it.
        """
        raise NotImplementedError('This method has been removed. '
                                  'Please use DataSet.set_interdependencies '
                                  'instead.')

    def set_interdependencies(self, interdeps: InterDependencies_) -> None:
        """
        Overwrite the interdependencies object (which holds all added
        parameters and their relationships) of this dataset
        """
        if not isinstance(interdeps, InterDependencies_):
            raise TypeError('Wrong input type. Expected InterDepencies_, '
                            f'got {type(interdeps)}')

        if not self.pristine:
            mssg = ('Can not set interdependencies on a DataSet that has '
                    'been started.')
            raise RuntimeError(mssg)

        self._interdeps = interdeps

    def get_parameters(self) -> SPECS:
        return get_parameters(self.conn, self.run_id)

    def add_metadata(self, tag: str, metadata: Any):
        """
        Adds metadata to the DataSet. The metadata is stored under the
        provided tag. Note that None is not allowed as a metadata value.

        Args:
            tag: represents the key in the metadata dictionary
            metadata: actual metadata
        """

        self._metadata[tag] = metadata
        # `add_meta_data` is not atomic by itself, hence using `atomic`
        with atomic(self.conn) as conn:
            add_meta_data(conn, self.run_id, {tag: metadata})

    def add_snapshot(self, snapshot: str, overwrite: bool = False) -> None:
        """
        Adds a snapshot to this run

        Args:
            snapshot: the raw JSON dump of the snapshot
            overwrite: force overwrite an existing snapshot
        """
        if self.snapshot is None or overwrite:
            add_meta_data(self.conn, self.run_id, {'snapshot': snapshot})
        elif self.snapshot is not None and not overwrite:
            log.warning('This dataset already has a snapshot. Use overwrite'
                        '=True to overwrite that')

    @property
    def pristine(self) -> bool:
        """
        Is this DataSet pristine? A pristine DataSet has not yet been started,
        meaning that parameters can still be added and removed, but results
        can not be added.
        """
        return not (self._started or self._completed)

    @property
    def running(self) -> bool:
        """
        Is this DataSet currently running? A running DataSet has been started,
        but not yet completed.
        """
        return self._started and not (self._completed)

    @property
    def started(self) -> bool:
        """
        Has this DataSet been started? A DataSet not started can not have any
        results added to it.
        """
        return self._started

    @property
    def completed(self) -> bool:
        """
        Is this DataSet completed? A completed DataSet may not be modified in
        any way.
        """
        return self._completed

    @completed.setter
    def completed(self, value):
        self._completed = value
        if value:
            mark_run_complete(self.conn, self.run_id)

    def mark_started(self) -> None:
        """
        Mark this dataset as started. A dataset that has been started can not
        have its parameters modified.

        Calling this on an already started DataSet is a NOOP.
        """
        if not self._started:
            self._perform_start_actions()
            self._started = True

    def _perform_start_actions(self) -> None:
        """
        Perform the actions that must take place once the run has been started
        """
        paramspecs = new_to_old(self._interdeps).paramspecs

        for spec in paramspecs:
            add_parameter(self.conn, self.table_name, spec)

        desc_str = serial.to_json_for_storage(self.description)

        update_run_description(self.conn, self.run_id, desc_str)

        set_run_timestamp(self.conn, self.run_id)

    def mark_completed(self) -> None:
        """
        Mark dataset as complete and thus read only and notify the subscribers
        """
        if self.pristine:
            raise RuntimeError('Can not mark DataSet as complete before it '
                               'has been marked as started.')
        self.completed = True
        for sub in self.subscribers.values():
            sub.done_callback()

    @deprecate(alternative='mark_completed')
    def mark_complete(self):
        self.mark_completed()

    def add_result(self, results: Dict[str, VALUE]) -> int:
        """
        Add a logically single result to existing parameters

        Args:
            results: dictionary with name of a parameter as the key and the
                value to associate as the value.

        Returns:
            index in the DataSet that the result was stored at

        If a parameter exist in the dataset and it's not in the results
        dictionary, "Null" values are inserted.

        It is an error to provide a value for a key or keyword that is not
        the name of a parameter in this DataSet.

        It is an error to add results to a completed DataSet.
        """

        if self.pristine:
            raise RuntimeError('This DataSet has not been marked as started. '
                               'Please mark the DataSet as started before '
                               'adding results to it.')

        if self.completed:
            raise CompletedError('This DataSet is complete, no further '
                                 'results can be added to it.')
        try:
            parameters = [
                self._interdeps._id_to_paramspec[name] for name in results
            ]
            self._interdeps.validate_subset(parameters)
        except DependencyError as de:
            raise ValueError(
                'Can not add result, missing setpoint values') from de

        index = insert_values(self.conn, self.table_name, list(results.keys()),
                              list(results.values()))
        return index

    def add_results(self, results: List[Dict[str, VALUE]]) -> int:
        """
        Adds a sequence of results to the DataSet.

        Args:
            results: list of name-value dictionaries where each dictionary
                provides the values for the parameters in that result. If some
                parameters are missing the corresponding values are assumed
                to be None

        Returns:
            the index in the DataSet that the **first** result was stored at

        It is an error to provide a value for a key or keyword that is not
        the name of a parameter in this DataSet.

        It is an error to add results to a completed DataSet.
        """

        if self.pristine:
            raise RuntimeError('This DataSet has not been marked as started. '
                               'Please mark the DataSet as started before '
                               'adding results to it.')

        if self.completed:
            raise CompletedError('This DataSet is complete, no further '
                                 'results can be added to it.')

        expected_keys = frozenset.union(*[frozenset(d) for d in results])
        values = [[d.get(k, None) for k in expected_keys] for d in results]

        len_before_add = length(self.conn, self.table_name)

        insert_many_values(self.conn, self.table_name, list(expected_keys),
                           values)
        return len_before_add

    @staticmethod
    def _validate_parameters(
            *params: Union[str, ParamSpec, _BaseParameter]) -> List[str]:
        """
        Validate that the provided parameters have a name and return those
        names as a list.
        The Parameters may be a mix of strings, ParamSpecs or ordinary
        QCoDeS parameters.
        """

        valid_param_names = []
        for maybeParam in params:
            if isinstance(maybeParam, str):
                valid_param_names.append(maybeParam)
                continue
            else:
                try:
                    maybeParam = maybeParam.name
                except Exception as e:
                    raise ValueError(
                        "This parameter does not have  a name") from e
                valid_param_names.append(maybeParam)
        return valid_param_names

    def get_data(self,
                 *params: Union[str, ParamSpec, _BaseParameter],
                 start: Optional[int] = None,
                 end: Optional[int] = None) -> List[List[Any]]:
        """
        Returns the values stored in the DataSet for the specified parameters.
        The values are returned as a list of lists, SQL rows by SQL columns,
        e.g. datapoints by parameters. The data type of each element is based
        on the datatype provided when the DataSet was created. The parameter
        list may contain a mix of string parameter names, QCoDeS Parameter
        objects, and ParamSpec objects (as long as they have a `name` field).

        If provided, the start and end arguments select a range of results
        by result count (index). If the range is empty - that is, if the end is
        less than or equal to the start, or if start is after the current end
        of the DataSet – then a list of empty arrays is returned.

        For a more type independent and easier to work with view of the data
        you may want to consider using
        :py:meth:`.get_parameter_data`

        Args:
            *params: string parameter names, QCoDeS Parameter objects, and
                ParamSpec objects
            start: start value of selection range (by result count); ignored
                if None
            end: end value of selection range (by results count); ignored if
                None

        Returns:
            list of lists SQL rows of data by SQL columns. Each SQL row is a
            datapoint and each SQL column is a parameter. Each element will
            be of the datatypes stored in the database (numeric, array or
            string)
        """
        valid_param_names = self._validate_parameters(*params)
        return get_data(self.conn, self.table_name, valid_param_names, start,
                        end)

    def get_parameter_data(
            self,
            *params: Union[str, ParamSpec, _BaseParameter],
            start: Optional[int] = None,
            end: Optional[int] = None) -> Dict[str, Dict[str, numpy.ndarray]]:
        """
        Returns the values stored in the DataSet for the specified parameters
        and their dependencies. If no paramerers are supplied the values will
        be returned for all parameters that are not them self dependencies.

        The values are returned as a dictionary with names of the requested
        parameters as keys and values consisting of dictionaries with the
        names of the parameters and its dependencies as keys and numpy arrays
        of the data as values. If some of the parameters are stored as arrays
        the remaining parameters are expanded to the same shape as these.
        Apart from this expansion the data returned by this method
        is the transpose of the date returned by `get_data`.

        If provided, the start and end arguments select a range of results
        by result count (index). If the range is empty - that is, if the end is
        less than or equal to the start, or if start is after the current end
        of the DataSet – then a list of empty arrays is returned.

        Args:
            *params: string parameter names, QCoDeS Parameter objects, and
                ParamSpec objects. If no parameters are supplied data for
                all parameters that are not a dependency of another
                parameter will be returned.
            start: start value of selection range (by result count); ignored
                if None
            end: end value of selection range (by results count); ignored if
                None

        Returns:
            Dictionary from requested parameters to Dict of parameter names
            to numpy arrays containing the data points of type numeric,
            array or string.
        """
        if len(params) == 0:
            valid_param_names = get_non_dependencies(self.conn, self.run_id)
        else:
            valid_param_names = self._validate_parameters(*params)
        return get_parameter_data(self.conn, self.table_name,
                                  valid_param_names, start, end)

    def get_data_as_pandas_dataframe(self,
                                     *params: Union[str,
                                                    ParamSpec,
                                                    _BaseParameter],
                                     start: Optional[int] = None,
                                     end: Optional[int] = None) -> \
            Dict[str, pd.DataFrame]:
        """
        Returns the values stored in the DataSet for the specified parameters
        and their dependencies as a dict of :py:class:`pandas.DataFrame` s
        Each element in the dict is indexed by the names of the requested
        parameters.

        Each DataFrame contains a column for the data and is indexed by a
        :py:class:`pandas.MultiIndex` formed from all the setpoints
        of the parameter.

        If no parameters are supplied data will be be
        returned for all parameters in the dataset that are not them self
        dependencies of other parameters.

        If provided, the start and end arguments select a range of results
        by result count (index). If the range is empty - that is, if the end is
        less than or equal to the start, or if start is after the current end
        of the DataSet – then a dict of empty :py:class:`pandas.DataFrame` s is
        returned.

        Args:
            *params: string parameter names, QCoDeS Parameter objects, and
                ParamSpec objects. If no parameters are supplied data for
                all parameters that are not a dependency of another
                parameter will be returned.
            start: start value of selection range (by result count); ignored
                if None
            end: end value of selection range (by results count); ignored if
                None

        Returns:
            Dictionary from requested parameter names to
            :py:class:`pandas.DataFrame` s with the requested parameter as
            a column and a indexed by a :py:class:`pandas.MultiIndex` formed
            by the dependencies.
        """
        dfs = {}
        datadict = self.get_parameter_data(*params, start=start, end=end)
        for name, subdict in datadict.items():
            keys = list(subdict.keys())
            if len(keys) == 0:
                dfs[name] = pd.DataFrame()
                continue
            if len(keys) == 1:
                index = None
            elif len(keys) == 2:
                index = pd.Index(subdict[keys[1]].ravel(), name=keys[1])
            else:
                indexdata = tuple(
                    numpy.concatenate(subdict[key]) if subdict[key].dtype ==
                    numpy.dtype('O') else subdict[key].ravel()
                    for key in keys[1:])
                index = pd.MultiIndex.from_arrays(indexdata, names=keys[1:])

            if subdict[keys[0]].dtype == numpy.dtype('O'):
                # ravel will not fully unpack a numpy array of arrays
                # which are of "object" dtype. This can happen if a variable
                # length array is stored in the db. We use concatenate to
                # flatten these
                mydata = numpy.concatenate(subdict[keys[0]])
            else:
                mydata = subdict[keys[0]].ravel()
            df = pd.DataFrame(mydata, index=index, columns=[keys[0]])
            dfs[name] = df
        return dfs

    def get_values(self, param_name: str) -> List[List[Any]]:
        """
        Get the values (i.e. not NULLs) of the specified parameter
        """
        if param_name not in self.parameters:
            raise ValueError('Unknown parameter, not in this DataSet')

        values = get_values(self.conn, self.table_name, param_name)

        return values

    def get_setpoints(self, param_name: str) -> Dict[str, List[List[Any]]]:
        """
        Get the setpoints for the specified parameter

        Args:
            param_name: The name of the parameter for which to get the
                setpoints
        """

        paramspec: ParamSpecBase = self._interdeps._id_to_paramspec[param_name]

        if param_name not in self.parameters:
            raise ValueError('Unknown parameter, not in this DataSet')

        if paramspec not in self._interdeps.dependencies.keys():
            raise ValueError(f'Parameter {param_name} has no setpoints.')

        setpoints = get_setpoints(self.conn, self.table_name, param_name)

        return setpoints

    def subscribe(self,
                  callback: Callable[[Any, int, Optional[Any]], None],
                  min_wait: int = 0,
                  min_count: int = 1,
                  state: Optional[Any] = None,
                  callback_kwargs: Optional[Dict[str, Any]] = None) -> str:
        subscriber_id = uuid.uuid4().hex
        subscriber = _Subscriber(self, subscriber_id, callback, state,
                                 min_wait, min_count, callback_kwargs)
        self.subscribers[subscriber_id] = subscriber
        subscriber.start()
        return subscriber_id

    def subscribe_from_config(self, name: str) -> str:
        """
        Subscribe a subscriber defined in the `qcodesrc.json` config file to
        the data of this `DataSet`. The definition can be found at
        `subscription.subscribers`.

        Args:
            name: identifier of the subscriber. Equal to the key of the entry
                in 'qcodesrc.json::subscription.subscribers'.
        """
        subscribers = qcodes.config.subscription.subscribers
        try:
            subscriber_info = getattr(subscribers, name)
        # the dot dict behind the config does not convert the error and
        # actually raises a `KeyError`
        except (AttributeError, KeyError):
            keys = ','.join(subscribers.keys())
            raise RuntimeError(
                f'subscribe_from_config: failed to subscribe "{name}" to '
                f'DataSet from list of subscribers in `qcodesrc.json` '
                f'(subscriptions.subscribers). Chose one of: {keys}')
        # get callback from string
        parts = subscriber_info.factory.split('.')
        import_path, type_name = '.'.join(parts[:-1]), parts[-1]
        module = importlib.import_module(import_path)
        factory = getattr(module, type_name)

        kwargs = {k: v for k, v in subscriber_info.subscription_kwargs.items()}
        kwargs['callback'] = factory(self, **subscriber_info.factory_kwargs)
        kwargs['state'] = {}
        return self.subscribe(**kwargs)

    def unsubscribe(self, uuid: str) -> None:
        """
        Remove subscriber with the provided uuid
        """
        with atomic(self.conn) as conn:
            sub = self.subscribers[uuid]
            remove_trigger(conn, sub.trigger_id)
            sub.schedule_stop()
            sub.join()
            del self.subscribers[uuid]

    def unsubscribe_all(self):
        """
        Remove all subscribers
        """
        sql = "select * from sqlite_master where type = 'trigger';"
        triggers = atomic_transaction(self.conn, sql).fetchall()
        with atomic(self.conn) as conn:
            for trigger in triggers:
                remove_trigger(conn, trigger['name'])
            for sub in self.subscribers.values():
                sub.schedule_stop()
                sub.join()
            self.subscribers.clear()

    def get_metadata(self, tag):
        return get_metadata(self.conn, tag, self.table_name)

    def __len__(self) -> int:
        return length(self.conn, self.table_name)

    def __repr__(self) -> str:
        out = []
        heading = f"{self.name} #{self.run_id}@{self.path_to_db}"
        out.append(heading)
        out.append("-" * len(heading))
        ps = self.get_parameters()
        if len(ps) > 0:
            for p in ps:
                out.append(f"{p.name} - {p.type}")

        return "\n".join(out)
Exemple #3
0
    def __init__(self,
                 path_to_db: str = None,
                 run_id: Optional[int] = None,
                 conn: Optional[ConnectionPlus] = None,
                 exp_id=None,
                 name: str = None,
                 specs: Optional[SpecsOrInterDeps] = None,
                 values=None,
                 metadata=None) -> None:
        """
        Create a new DataSet object. The object can either hold a new run or
        an already existing run. If a run_id is provided, then an old run is
        looked up, else a new run is created.

        Args:
            path_to_db: path to the sqlite file on disk. If not provided, the
              path will be read from the config.
            run_id: provide this when loading an existing run, leave it
              as None when creating a new run
            conn: connection to the DB; if provided and `path_to_db` is
              provided as well, then a ValueError is raised (this is to
              prevent the possibility of providing a connection to a DB
              file that is different from `path_to_db`)
            exp_id: the id of the experiment in which to create a new run.
              Ignored if run_id is provided.
            name: the name of the dataset. Ignored if run_id is provided.
            specs: paramspecs belonging to the dataset. Ignored if run_id is
              provided.
            values: values to insert into the dataset. Ignored if run_id is
              provided.
            metadata: metadata to insert into the dataset. Ignored if run_id
              is provided.
        """
        if path_to_db is not None and conn is not None:
            raise ValueError("Both `path_to_db` and `conn` arguments have "
                             "been passed together with non-None values. "
                             "This is not allowed.")
        self._path_to_db = path_to_db or get_DB_location()

        self.conn = make_connection_plus_from(conn) if conn is not None else \
            connect(self.path_to_db)

        self._run_id = run_id
        self._debug = False
        self.subscribers: Dict[str, _Subscriber] = {}
        self._interdeps: InterDependencies_

        if run_id is not None:
            if not run_exists(self.conn, run_id):
                raise ValueError(f"Run with run_id {run_id} does not exist in "
                                 f"the database")
            self._completed = completed(self.conn, self.run_id)
            run_desc = self._get_run_description_from_db()
            self._interdeps = run_desc.interdeps
            self._metadata = get_metadata_from_run_id(self.conn, run_id)
            self._started = self.run_timestamp_raw is not None

        else:
            # Actually perform all the side effects needed for the creation
            # of a new dataset. Note that a dataset is created (in the DB)
            # with no parameters; they are written to disk when the dataset
            # is marked as started
            if exp_id is None:
                if len(get_experiments(self.conn)) > 0:
                    exp_id = get_last_experiment(self.conn)
                else:
                    raise ValueError("No experiments found."
                                     "You can start a new one with:"
                                     " new_experiment(name, sample_name)")
            name = name or "dataset"
            _, run_id, __ = create_run(self.conn,
                                       exp_id,
                                       name,
                                       generate_guid(),
                                       parameters=None,
                                       values=values,
                                       metadata=metadata)
            # this is really the UUID (an ever increasing count in the db)
            self._run_id = run_id
            self._completed = False
            self._started = False
            if isinstance(specs, InterDependencies_):
                self._interdeps = specs
            elif specs is not None:
                self._interdeps = old_to_new(InterDependencies(*specs))
            else:
                self._interdeps = InterDependencies_()
            self._metadata = get_metadata_from_run_id(self.conn, self.run_id)
Exemple #4
0
def test_extend(some_paramspecbases):

    ps1, ps2, ps3, _ = some_paramspecbases

    idps = InterDependencies_(standalones=(ps1, ps2))

    idps_ext = idps.extend(dependencies={ps1: (ps3, )})
    idps_expected = InterDependencies_(standalones=(ps2, ),
                                       dependencies={ps1: (ps3, )})
    assert idps_ext == idps_expected

    # lazily check that we get brand new objects
    idps._id_to_paramspec[ps1.name].label = "Something new and awful"
    idps._id_to_paramspec[ps2.name].unit = "Ghastly unit"
    assert idps_ext._id_to_paramspec[ps1.name].label == 'blah'
    assert idps_ext._id_to_paramspec[ps2.name].unit == 'V'
    # reset the objects that are never supposed to be mutated
    idps._id_to_paramspec[ps1.name].label = "blah"
    idps._id_to_paramspec[ps2.name].unit = "V"

    idps = InterDependencies_(standalones=(ps2, ))
    idps_ext = idps.extend(dependencies={ps1: (ps2, )})
    idps_expected = InterDependencies_(dependencies={ps1: (ps2, )})
    assert idps_ext == idps_expected

    idps = InterDependencies_(dependencies={ps1: (ps2, )})
    idps_ext = idps.extend(dependencies={ps1: (ps2, ps3)})
    idps_expected = InterDependencies_(dependencies={ps1: (ps2, ps3)})
    assert idps_ext == idps_expected

    idps = InterDependencies_()
    idps_ext = idps.extend(standalones=(ps1, ps2))
    idps_expected = InterDependencies_(standalones=(ps2, ps1))
    assert idps_ext == idps_expected

    ps_nu = deepcopy(ps1)
    ps_nu.unit += '/s'
    idps = InterDependencies_(standalones=(ps1, ))
    idps_ext = idps.extend(standalones=(ps_nu, ))
    idps_expected = InterDependencies_(standalones=(ps_nu, ps1))
    assert idps_ext == idps_expected

    idps = InterDependencies_(dependencies={ps1: (ps2, )})
    match = re.escape("Invalid dependencies/inferences")
    with pytest.raises(ValueError, match=match):
        idps_ext = idps.extend(inferences={ps2: (ps1, )})
Exemple #5
0
def test_remove(some_paramspecbases):
    ps1, ps2, ps3, ps4 = some_paramspecbases

    idps = InterDependencies_(dependencies={ps1: (ps2, ps3)},
                              inferences={ps2: (ps4, )})
    idps_rem = idps.remove(ps1)
    idps_expected = InterDependencies_(inferences={ps2: (ps4, )},
                                       standalones=(ps3, ))
    assert idps_rem == idps_expected

    for p in [ps4, ps2, ps3]:
        match = re.escape(f'Cannot remove {p.name}, other parameters')
        with pytest.raises(ValueError, match=match):
            idps_rem = idps.remove(p)

    idps = InterDependencies_(dependencies={ps1: (ps3, )},
                              inferences={ps2: (ps4, )})
    idps_rem = idps.remove(ps2)
    idps_expected = InterDependencies_(dependencies={ps1: (ps3, )},
                                       standalones=(ps4, ))

    assert idps_rem == idps_expected

    idps = InterDependencies_(dependencies={ps1: (ps2, ps3)},
                              standalones=(ps4, ))
    idps_rem = idps.remove(ps4)
    idps_expected = InterDependencies_(dependencies={ps1: (ps2, ps3)})
    assert idps_rem == idps_expected

    idps = InterDependencies_(dependencies={ps1: (ps2, ps3)},
                              standalones=(ps4, ))
    idps_rem = idps.remove(ps1)
    idps_expected = InterDependencies_(standalones=(ps2, ps3, ps4))
    assert idps_rem == idps_expected
Exemple #6
0
def test_init(some_paramspecbases):
    """
    Assert that the init functions correctly sets up the object.
    Assert via the public-facing methods.
    """

    (ps1, ps2, ps3, ps4) = some_paramspecbases

    idps1 = InterDependencies_(dependencies={ps1: (ps2, )})
    idps2 = InterDependencies_(dependencies={ps1: (ps2, ps2, ps2)})

    assert idps1 == idps2
    assert idps1.what_depends_on(ps2) == (ps1, )
    assert idps1.what_is_inferred_from(ps2) == ()
    assert idps1.non_dependencies == (ps1, )

    idps1 = InterDependencies_(dependencies={ps1: (ps2, ps3)})
    idps2 = InterDependencies_(dependencies={ps1: (ps3, ps2)})

    assert idps1.what_depends_on(ps2) == (ps1, )
    assert idps1.what_depends_on(ps3) == (ps1, )
    assert idps1.non_dependencies == (ps1, )
    assert idps2.non_dependencies == (ps1, )

    idps = InterDependencies_(dependencies={ps1: (ps3, ps2), ps4: (ps3, )})
    assert set(idps.what_depends_on(ps3)) == set((ps1, ps4))
    assert idps.non_dependencies == (ps1, ps4)
Exemple #7
0
def test_validate_subset(some_paramspecbases):

    ps1, ps2, ps3, ps4 = some_paramspecbases

    idps = InterDependencies_(dependencies={ps1: (ps2, ps3)},
                              inferences={
                                  ps2: (ps4, ),
                                  ps3: (ps4, )
                              })

    idps.validate_subset((ps4, ))
    idps.validate_subset((ps2, ps4))
    idps.validate_subset((ps2, ps3, ps4))
    idps.validate_subset(())
    idps.validate_subset([])

    with pytest.raises(DependencyError) as exc_info:
        idps.validate_subset((ps1, ))
    assert exc_info.value._param_name == 'psb1'
    assert exc_info.value._missing_params == {'psb2', 'psb3'}

    with pytest.raises(DependencyError) as exc_info:
        idps.validate_subset((ps1, ps2, ps4))
    assert exc_info.value._param_name == 'psb1'
    assert exc_info.value._missing_params == {'psb3'}

    with pytest.raises(InferenceError) as exc_info:
        idps.validate_subset((ps3, ))
    assert exc_info.value._param_name == 'psb3'
    assert exc_info.value._missing_params == {'psb4'}

    with pytest.raises(InferenceError) as exc_info:
        idps2 = InterDependencies_(dependencies={ps1: (ps2, ps3)},
                                   inferences={ps3: (ps4, )})
        idps2.validate_subset((ps1, ps2, ps3))
    assert exc_info.value._param_name == 'psb3'
    assert exc_info.value._missing_params == {'psb4'}

    with pytest.raises(ValueError, match='ps42'):
        ps42 = ParamSpecBase('ps42', paramtype='text', label='', unit='it')
        idps.validate_subset((ps2, ps42, ps4))
Exemple #8
0
def test_extend_with_paramspec(some_paramspecs):
    ps1 = some_paramspecs[1]['ps1']
    ps2 = some_paramspecs[1]['ps2']
    ps3 = some_paramspecs[1]['ps3']
    ps4 = some_paramspecs[1]['ps4']
    ps5 = some_paramspecs[1]['ps5']
    ps6 = some_paramspecs[1]['ps6']

    ps1_base = ps1.base_version()
    ps2_base = ps2.base_version()
    ps3_base = ps3.base_version()
    ps4_base = ps4.base_version()
    ps5_base = ps5.base_version()
    ps6_base = ps6.base_version()

    idps_bare = InterDependencies_(standalones=(ps1_base, ))
    idps_extended = InterDependencies_(inferences={ps3_base: (ps1_base, )})

    assert idps_bare._extend_with_paramspec(ps3) == idps_extended

    idps_bare = InterDependencies_(standalones=(ps2_base, ),
                                   inferences={ps3_base: (ps1_base, )})
    idps_extended = InterDependencies_(inferences={
        ps3_base: (ps1_base, ),
        ps4_base: (ps2_base, )
    })

    assert idps_bare._extend_with_paramspec(ps4) == idps_extended

    idps_bare = InterDependencies_(standalones=(ps1_base, ps2_base))
    idps_extended = InterDependencies_(inferences={
        ps3_base: (ps1_base, ),
        ps4_base: (ps2_base, )
    },
                                       dependencies={
                                           ps5_base: (ps3_base, ps4_base),
                                           ps6_base: (ps3_base, ps4_base)
                                       })
    assert (idps_bare._extend_with_paramspec(ps3)._extend_with_paramspec(
        ps4)._extend_with_paramspec(ps5)._extend_with_paramspec(ps6)
            ) == idps_extended
Exemple #9
0
 def tester(idps):
     ser = idps._to_dict()
     json.dumps(ser)
     idps_deser = InterDependencies_._from_dict(ser)
     assert idps == idps_deser
Exemple #10
0
def test_subscription_from_config(dataset, basic_subscriber):
    """
    This test is similar to `test_basic_subscription`, with the only
    difference that another subscriber from a config file is added.
    """
    # This string represents the config file in the home directory:
    config = """
    {
        "subscription":{
            "subscribers":{
                "test_subscriber":{
                    "factory": "qcodes.tests.dataset.test_subscribing.MockSubscriber",
                    "factory_kwargs":{
                        "lg": false
                    },
                    "subscription_kwargs":{
                        "min_wait": 0,
                        "min_count": 1,
                        "callback_kwargs": {}
                    }
                }
            }
        }
    }
    """
    # This little dance around the db_location is due to the fact that the
    # dataset fixture creates a dataset in a db in a temporary directory.
    # Therefore we need to 'backup' the path to the db when using the
    # default configuration.
    db_location = qcodes.config.core.db_location
    with default_config(user_config=config):
        qcodes.config.core.db_location = db_location

        assert 'test_subscriber' in qcodes.config.subscription.subscribers

        xparam = ParamSpecBase(name='x',
                               paramtype='numeric',
                               label='x parameter',
                               unit='V')
        yparam = ParamSpecBase(name='y',
                               paramtype='numeric',
                               label='y parameter',
                               unit='Hz')
        idps = InterDependencies_(dependencies={yparam: (xparam, )})
        dataset.set_interdependencies(idps)

        dataset.mark_started()

        sub_id = dataset.subscribe(basic_subscriber,
                                   min_wait=0,
                                   min_count=1,
                                   state={})
        sub_id_c = dataset.subscribe_from_config('test_subscriber')
        assert len(dataset.subscribers) == 2
        assert list(dataset.subscribers.keys()) == [sub_id, sub_id_c]

        expected_state = {}

        # Here we are only testing 2 to reduce the CI time
        for x in range(2):
            y = -x**2
            dataset.add_result({'x': x, 'y': y})
            expected_state[x + 1] = [(x, y)]

            @retry_until_does_not_throw(
                exception_class_to_expect=AssertionError, delay=0, tries=10)
            def assert_expected_state():
                assert dataset.subscribers[sub_id].state == expected_state
                assert dataset.subscribers[sub_id_c].state == expected_state

            assert_expected_state()