def test_validate_subset(some_paramspecbases): ps1, ps2, ps3, ps4 = some_paramspecbases idps = InterDependencies_(dependencies={ps1: (ps2, ps3)}, inferences={ ps2: (ps4, ), ps3: (ps4, ) }) idps.validate_subset((ps4, )) idps.validate_subset((ps2, ps4)) idps.validate_subset((ps2, ps3, ps4)) idps.validate_subset(()) idps.validate_subset([]) with pytest.raises(DependencyError) as exc_info: idps.validate_subset((ps1, )) assert exc_info.value._param_name == 'psb1' assert exc_info.value._missing_params == {'psb2', 'psb3'} with pytest.raises(DependencyError) as exc_info: idps.validate_subset((ps1, ps2, ps4)) assert exc_info.value._param_name == 'psb1' assert exc_info.value._missing_params == {'psb3'} with pytest.raises(InferenceError) as exc_info: idps.validate_subset((ps3, )) assert exc_info.value._param_name == 'psb3' assert exc_info.value._missing_params == {'psb4'} with pytest.raises(InferenceError) as exc_info: idps2 = InterDependencies_(dependencies={ps1: (ps2, ps3)}, inferences={ps3: (ps4, )}) idps2.validate_subset((ps1, ps2, ps3)) assert exc_info.value._param_name == 'psb3' assert exc_info.value._missing_params == {'psb4'} with pytest.raises(ValueError, match='ps42'): ps42 = ParamSpecBase('ps42', paramtype='text', label='', unit='it') idps.validate_subset((ps2, ps42, ps4))
class DataSet(Sized): # the "persistent traits" are the attributes/properties of the DataSet # that are NOT tied to the representation of the DataSet in any particular # database persistent_traits = ('name', 'guid', 'number_of_results', 'parameters', 'paramspecs', 'exp_name', 'sample_name', 'completed', 'snapshot', 'run_timestamp_raw', 'description', 'completed_timestamp_raw', 'metadata', 'dependent_parameters') def __init__(self, path_to_db: str = None, run_id: Optional[int] = None, conn: Optional[ConnectionPlus] = None, exp_id=None, name: str = None, specs: Optional[SpecsOrInterDeps] = None, values=None, metadata=None) -> None: """ Create a new DataSet object. The object can either hold a new run or an already existing run. If a run_id is provided, then an old run is looked up, else a new run is created. Args: path_to_db: path to the sqlite file on disk. If not provided, the path will be read from the config. run_id: provide this when loading an existing run, leave it as None when creating a new run conn: connection to the DB; if provided and `path_to_db` is provided as well, then a ValueError is raised (this is to prevent the possibility of providing a connection to a DB file that is different from `path_to_db`) exp_id: the id of the experiment in which to create a new run. Ignored if run_id is provided. name: the name of the dataset. Ignored if run_id is provided. specs: paramspecs belonging to the dataset. Ignored if run_id is provided. values: values to insert into the dataset. Ignored if run_id is provided. metadata: metadata to insert into the dataset. Ignored if run_id is provided. """ self.conn = conn_from_dbpath_or_conn(conn, path_to_db) self._run_id = run_id self._debug = False self.subscribers: Dict[str, _Subscriber] = {} self._interdeps: InterDependencies_ if run_id is not None: if not run_exists(self.conn, run_id): raise ValueError(f"Run with run_id {run_id} does not exist in " f"the database") self._completed = completed(self.conn, self.run_id) run_desc = self._get_run_description_from_db() self._interdeps = run_desc.interdeps self._metadata = get_metadata_from_run_id(self.conn, run_id) self._started = self.run_timestamp_raw is not None else: # Actually perform all the side effects needed for the creation # of a new dataset. Note that a dataset is created (in the DB) # with no parameters; they are written to disk when the dataset # is marked as started if exp_id is None: if len(get_experiments(self.conn)) > 0: exp_id = get_last_experiment(self.conn) else: raise ValueError("No experiments found." "You can start a new one with:" " new_experiment(name, sample_name)") name = name or "dataset" _, run_id, __ = create_run(self.conn, exp_id, name, generate_guid(), parameters=None, values=values, metadata=metadata) # this is really the UUID (an ever increasing count in the db) self._run_id = run_id self._completed = False self._started = False if isinstance(specs, InterDependencies_): self._interdeps = specs elif specs is not None: self._interdeps = old_to_new(InterDependencies(*specs)) else: self._interdeps = InterDependencies_() self._metadata = get_metadata_from_run_id(self.conn, self.run_id) @property def run_id(self): return self._run_id @property def path_to_db(self): return self.conn.path_to_dbfile @property def name(self): return select_one_where(self.conn, "runs", "name", "run_id", self.run_id) @property def table_name(self): return select_one_where(self.conn, "runs", "result_table_name", "run_id", self.run_id) @property def guid(self): return get_guid_from_run_id(self.conn, self.run_id) @property def snapshot(self) -> Optional[dict]: """Snapshot of the run as dictionary (or None)""" snapshot_json = self.snapshot_raw if snapshot_json is not None: return json.loads(snapshot_json) else: return None @property def snapshot_raw(self) -> Optional[str]: """Snapshot of the run as a JSON-formatted string (or None)""" return select_one_where(self.conn, "runs", "snapshot", "run_id", self.run_id) @property def number_of_results(self): sql = f'SELECT COUNT(*) FROM "{self.table_name}"' cursor = atomic_transaction(self.conn, sql) return one(cursor, 'COUNT(*)') @property def counter(self): return select_one_where(self.conn, "runs", "result_counter", "run_id", self.run_id) @property def parameters(self) -> str: if self.pristine: psnames = [ps.name for ps in self.description.interdeps.paramspecs] return ','.join(psnames) else: return select_one_where(self.conn, "runs", "parameters", "run_id", self.run_id) @property def paramspecs(self) -> Dict[str, Union[ParamSpec, ParamSpecBase]]: params: Sequence if self.pristine: params = self.description.interdeps.paramspecs else: params = self.get_parameters() return {ps.name: ps for ps in params} @property def dependent_parameters(self) -> Tuple[ParamSpecBase, ...]: """ Return all the parameters that explicitly depend on other parameters """ return tuple(self._interdeps.dependencies.keys()) @property def exp_id(self) -> int: return select_one_where(self.conn, "runs", "exp_id", "run_id", self.run_id) @property def exp_name(self) -> str: return get_experiment_name_from_experiment_id(self.conn, self.exp_id) @property def sample_name(self) -> str: return get_sample_name_from_experiment_id(self.conn, self.exp_id) @property def run_timestamp_raw(self) -> Optional[float]: """ Returns run timestamp as number of seconds since the Epoch The run timestamp is the moment when the measurement for this run started. """ return get_run_timestamp_from_run_id(self.conn, self.run_id) @property def description(self) -> RunDescriber: return RunDescriber(interdeps=self._interdeps) @property def metadata(self) -> Dict: return self._metadata def the_same_dataset_as(self, other: 'DataSet') -> bool: """ Check if two datasets correspond to the same run by comparing all their persistent traits. Note that this method does not compare the data itself. This function raises if the GUIDs match but anything else doesn't Args: other: the dataset to compare self to """ if not isinstance(other, DataSet): return False guids_match = self.guid == other.guid for attr in DataSet.persistent_traits: if getattr(self, attr) != getattr(other, attr): if guids_match: raise RuntimeError('Critical inconsistency detected! ' 'The two datasets have the same GUID,' f' but their "{attr}" differ.') else: return False return True def run_timestamp(self, fmt: str = "%Y-%m-%d %H:%M:%S") -> Optional[str]: """ Returns run timestamp in a human-readable format The run timestamp is the moment when the measurement for this run started. If the run has not yet been started, this function returns None. Consult with `time.strftime` for information about the format. """ if self.run_timestamp_raw is None: return None else: return time.strftime(fmt, time.localtime(self.run_timestamp_raw)) @property def completed_timestamp_raw(self) -> Union[float, None]: """ Returns timestamp when measurement run was completed as number of seconds since the Epoch If the run (or the dataset) is not completed, then returns None. """ return get_completed_timestamp_from_run_id(self.conn, self.run_id) def completed_timestamp(self, fmt: str = "%Y-%m-%d %H:%M:%S") -> Optional[str]: """ Returns timestamp when measurement run was completed in a human-readable format If the run (or the dataset) is not completed, then returns None. Consult with `time.strftime` for information about the format. """ completed_timestamp_raw = self.completed_timestamp_raw if completed_timestamp_raw: completed_timestamp: Optional[str] = time.strftime( fmt, time.localtime(completed_timestamp_raw)) else: completed_timestamp = None return completed_timestamp def _get_run_description_from_db(self) -> RunDescriber: """ Look up the run_description from the database """ desc_str = get_run_description(self.conn, self.run_id) return serial.from_json_to_current(desc_str) def toggle_debug(self): """ Toggle debug mode, if debug mode is on all the queries made are echoed back. """ self._debug = not self._debug self.conn.close() self.conn = connect(self.path_to_db, self._debug) def add_parameter(self, spec: ParamSpec): """ Old method; don't use it. """ raise NotImplementedError('This method has been removed. ' 'Please use DataSet.set_interdependencies ' 'instead.') def set_interdependencies(self, interdeps: InterDependencies_) -> None: """ Overwrite the interdependencies object (which holds all added parameters and their relationships) of this dataset """ if not isinstance(interdeps, InterDependencies_): raise TypeError('Wrong input type. Expected InterDepencies_, ' f'got {type(interdeps)}') if not self.pristine: mssg = ('Can not set interdependencies on a DataSet that has ' 'been started.') raise RuntimeError(mssg) self._interdeps = interdeps def get_parameters(self) -> SPECS: rd_v0 = v1_to_v0(self.description) old_interdeps = rd_v0.interdeps return list(old_interdeps.paramspecs) def add_metadata(self, tag: str, metadata: Any): """ Adds metadata to the DataSet. The metadata is stored under the provided tag. Note that None is not allowed as a metadata value. Args: tag: represents the key in the metadata dictionary metadata: actual metadata """ self._metadata[tag] = metadata # `add_meta_data` is not atomic by itself, hence using `atomic` with atomic(self.conn) as conn: add_meta_data(conn, self.run_id, {tag: metadata}) def add_snapshot(self, snapshot: str, overwrite: bool = False) -> None: """ Adds a snapshot to this run Args: snapshot: the raw JSON dump of the snapshot overwrite: force overwrite an existing snapshot """ if self.snapshot is None or overwrite: add_meta_data(self.conn, self.run_id, {'snapshot': snapshot}) elif self.snapshot is not None and not overwrite: log.warning('This dataset already has a snapshot. Use overwrite' '=True to overwrite that') @property def pristine(self) -> bool: """ Is this DataSet pristine? A pristine DataSet has not yet been started, meaning that parameters can still be added and removed, but results can not be added. """ return not(self._started or self._completed) @property def running(self) -> bool: """ Is this DataSet currently running? A running DataSet has been started, but not yet completed. """ return self._started and not(self._completed) @property def started(self) -> bool: """ Has this DataSet been started? A DataSet not started can not have any results added to it. """ return self._started @property def completed(self) -> bool: """ Is this DataSet completed? A completed DataSet may not be modified in any way. """ return self._completed @completed.setter def completed(self, value): self._completed = value if value: mark_run_complete(self.conn, self.run_id) def mark_started(self) -> None: """ Mark this dataset as started. A dataset that has been started can not have its parameters modified. Calling this on an already started DataSet is a NOOP. """ if not self._started: self._perform_start_actions() self._started = True def _perform_start_actions(self) -> None: """ Perform the actions that must take place once the run has been started """ paramspecs = new_to_old(self._interdeps).paramspecs for spec in paramspecs: add_parameter(self.conn, self.table_name, spec) desc_str = serial.to_json_for_storage(self.description) update_run_description(self.conn, self.run_id, desc_str) set_run_timestamp(self.conn, self.run_id) def mark_completed(self) -> None: """ Mark dataset as complete and thus read only and notify the subscribers """ if self.pristine: raise RuntimeError('Can not mark DataSet as complete before it ' 'has been marked as started.') self.completed = True for sub in self.subscribers.values(): sub.done_callback() @deprecate(alternative='mark_completed') def mark_complete(self): self.mark_completed() def add_result(self, results: Dict[str, VALUE]) -> int: """ Add a logically single result to existing parameters Args: results: dictionary with name of a parameter as the key and the value to associate as the value. Returns: index in the DataSet that the result was stored at If a parameter exist in the dataset and it's not in the results dictionary, "Null" values are inserted. It is an error to provide a value for a key or keyword that is not the name of a parameter in this DataSet. It is an error to add results to a completed DataSet. """ if self.pristine: raise RuntimeError('This DataSet has not been marked as started. ' 'Please mark the DataSet as started before ' 'adding results to it.') if self.completed: raise CompletedError('This DataSet is complete, no further ' 'results can be added to it.') try: parameters = [self._interdeps._id_to_paramspec[name] for name in results] self._interdeps.validate_subset(parameters) except DependencyError as de: raise ValueError( 'Can not add result, missing setpoint values') from de index = insert_values(self.conn, self.table_name, list(results.keys()), list(results.values()) ) return index def add_results(self, results: List[Dict[str, VALUE]]) -> int: """ Adds a sequence of results to the DataSet. Args: results: list of name-value dictionaries where each dictionary provides the values for the parameters in that result. If some parameters are missing the corresponding values are assumed to be None Returns: the index in the DataSet that the **first** result was stored at It is an error to provide a value for a key or keyword that is not the name of a parameter in this DataSet. It is an error to add results to a completed DataSet. """ if self.pristine: raise RuntimeError('This DataSet has not been marked as started. ' 'Please mark the DataSet as started before ' 'adding results to it.') if self.completed: raise CompletedError('This DataSet is complete, no further ' 'results can be added to it.') expected_keys = frozenset.union(*[frozenset(d) for d in results]) values = [[d.get(k, None) for k in expected_keys] for d in results] len_before_add = length(self.conn, self.table_name) insert_many_values(self.conn, self.table_name, list(expected_keys), values) return len_before_add @staticmethod def _validate_parameters(*params: Union[str, ParamSpec, _BaseParameter] ) -> List[str]: """ Validate that the provided parameters have a name and return those names as a list. The Parameters may be a mix of strings, ParamSpecs or ordinary QCoDeS parameters. """ valid_param_names = [] for maybeParam in params: if isinstance(maybeParam, str): valid_param_names.append(maybeParam) continue else: try: maybeParam = maybeParam.name except Exception as e: raise ValueError( "This parameter does not have a name") from e valid_param_names.append(maybeParam) return valid_param_names def get_data(self, *params: Union[str, ParamSpec, _BaseParameter], start: Optional[int] = None, end: Optional[int] = None) -> List[List[Any]]: """ Returns the values stored in the DataSet for the specified parameters. The values are returned as a list of lists, SQL rows by SQL columns, e.g. datapoints by parameters. The data type of each element is based on the datatype provided when the DataSet was created. The parameter list may contain a mix of string parameter names, QCoDeS Parameter objects, and ParamSpec objects (as long as they have a `name` field). If provided, the start and end arguments select a range of results by result count (index). If the range is empty - that is, if the end is less than or equal to the start, or if start is after the current end of the DataSet – then a list of empty arrays is returned. For a more type independent and easier to work with view of the data you may want to consider using :py:meth:`.get_parameter_data` Args: *params: string parameter names, QCoDeS Parameter objects, and ParamSpec objects start: start value of selection range (by result count); ignored if None end: end value of selection range (by results count); ignored if None Returns: list of lists SQL rows of data by SQL columns. Each SQL row is a datapoint and each SQL column is a parameter. Each element will be of the datatypes stored in the database (numeric, array or string) """ valid_param_names = self._validate_parameters(*params) return get_data(self.conn, self.table_name, valid_param_names, start, end) def get_parameter_data( self, *params: Union[str, ParamSpec, _BaseParameter], start: Optional[int] = None, end: Optional[int] = None) -> Dict[str, Dict[str, numpy.ndarray]]: """ Returns the values stored in the DataSet for the specified parameters and their dependencies. If no paramerers are supplied the values will be returned for all parameters that are not them self dependencies. The values are returned as a dictionary with names of the requested parameters as keys and values consisting of dictionaries with the names of the parameters and its dependencies as keys and numpy arrays of the data as values. If some of the parameters are stored as arrays the remaining parameters are expanded to the same shape as these. Apart from this expansion the data returned by this method is the transpose of the date returned by `get_data`. If provided, the start and end arguments select a range of results by result count (index). If the range is empty - that is, if the end is less than or equal to the start, or if start is after the current end of the DataSet – then a list of empty arrays is returned. Args: *params: string parameter names, QCoDeS Parameter objects, and ParamSpec objects. If no parameters are supplied data for all parameters that are not a dependency of another parameter will be returned. start: start value of selection range (by result count); ignored if None end: end value of selection range (by results count); ignored if None Returns: Dictionary from requested parameters to Dict of parameter names to numpy arrays containing the data points of type numeric, array or string. """ if len(params) == 0: valid_param_names = [ps.name for ps in self._interdeps.non_dependencies] else: valid_param_names = self._validate_parameters(*params) return get_parameter_data(self.conn, self.table_name, valid_param_names, start, end) def get_data_as_pandas_dataframe(self, *params: Union[str, ParamSpec, _BaseParameter], start: Optional[int] = None, end: Optional[int] = None) -> \ Dict[str, pd.DataFrame]: """ Returns the values stored in the DataSet for the specified parameters and their dependencies as a dict of :py:class:`pandas.DataFrame` s Each element in the dict is indexed by the names of the requested parameters. Each DataFrame contains a column for the data and is indexed by a :py:class:`pandas.MultiIndex` formed from all the setpoints of the parameter. If no parameters are supplied data will be be returned for all parameters in the dataset that are not them self dependencies of other parameters. If provided, the start and end arguments select a range of results by result count (index). If the range is empty - that is, if the end is less than or equal to the start, or if start is after the current end of the DataSet – then a dict of empty :py:class:`pandas.DataFrame` s is returned. Args: *params: string parameter names, QCoDeS Parameter objects, and ParamSpec objects. If no parameters are supplied data for all parameters that are not a dependency of another parameter will be returned. start: start value of selection range (by result count); ignored if None end: end value of selection range (by results count); ignored if None Returns: Dictionary from requested parameter names to :py:class:`pandas.DataFrame` s with the requested parameter as a column and a indexed by a :py:class:`pandas.MultiIndex` formed by the dependencies. """ dfs = {} datadict = self.get_parameter_data(*params, start=start, end=end) for name, subdict in datadict.items(): keys = list(subdict.keys()) if len(keys) == 0: dfs[name] = pd.DataFrame() continue if len(keys) == 1: index = None elif len(keys) == 2: index = pd.Index(subdict[keys[1]].ravel(), name=keys[1]) else: indexdata = tuple(numpy.concatenate(subdict[key]) if subdict[key].dtype == numpy.dtype('O') else subdict[key].ravel() for key in keys[1:]) index = pd.MultiIndex.from_arrays( indexdata, names=keys[1:]) if subdict[keys[0]].dtype == numpy.dtype('O'): # ravel will not fully unpack a numpy array of arrays # which are of "object" dtype. This can happen if a variable # length array is stored in the db. We use concatenate to # flatten these mydata = numpy.concatenate(subdict[keys[0]]) else: mydata = subdict[keys[0]].ravel() df = pd.DataFrame(mydata, index=index, columns=[keys[0]]) dfs[name] = df return dfs def get_values(self, param_name: str) -> List[List[Any]]: """ Get the values (i.e. not NULLs) of the specified parameter """ if param_name not in self.parameters: raise ValueError('Unknown parameter, not in this DataSet') values = get_values(self.conn, self.table_name, param_name) return values def get_setpoints(self, param_name: str) -> Dict[str, List[List[Any]]]: """ Get the setpoints for the specified parameter Args: param_name: The name of the parameter for which to get the setpoints """ paramspec: ParamSpecBase = self._interdeps._id_to_paramspec[param_name] if param_name not in self.parameters: raise ValueError('Unknown parameter, not in this DataSet') if paramspec not in self._interdeps.dependencies.keys(): raise ValueError(f'Parameter {param_name} has no setpoints.') setpoints = get_setpoints(self.conn, self.table_name, param_name) return setpoints def subscribe(self, callback: Callable[[Any, int, Optional[Any]], None], min_wait: int = 0, min_count: int = 1, state: Optional[Any] = None, callback_kwargs: Optional[Dict[str, Any]] = None ) -> str: subscriber_id = uuid.uuid4().hex subscriber = _Subscriber(self, subscriber_id, callback, state, min_wait, min_count, callback_kwargs) self.subscribers[subscriber_id] = subscriber subscriber.start() return subscriber_id def subscribe_from_config(self, name: str) -> str: """ Subscribe a subscriber defined in the `qcodesrc.json` config file to the data of this `DataSet`. The definition can be found at `subscription.subscribers`. Args: name: identifier of the subscriber. Equal to the key of the entry in 'qcodesrc.json::subscription.subscribers'. """ subscribers = qcodes.config.subscription.subscribers try: subscriber_info = getattr(subscribers, name) # the dot dict behind the config does not convert the error and # actually raises a `KeyError` except (AttributeError, KeyError): keys = ','.join(subscribers.keys()) raise RuntimeError( f'subscribe_from_config: failed to subscribe "{name}" to ' f'DataSet from list of subscribers in `qcodesrc.json` ' f'(subscriptions.subscribers). Chose one of: {keys}') # get callback from string parts = subscriber_info.factory.split('.') import_path, type_name = '.'.join(parts[:-1]), parts[-1] module = importlib.import_module(import_path) factory = getattr(module, type_name) kwargs = {k: v for k, v in subscriber_info.subscription_kwargs.items()} kwargs['callback'] = factory(self, **subscriber_info.factory_kwargs) kwargs['state'] = {} return self.subscribe(**kwargs) def unsubscribe(self, uuid: str) -> None: """ Remove subscriber with the provided uuid """ with atomic(self.conn) as conn: sub = self.subscribers[uuid] remove_trigger(conn, sub.trigger_id) sub.schedule_stop() sub.join() del self.subscribers[uuid] def unsubscribe_all(self): """ Remove all subscribers """ sql = "select * from sqlite_master where type = 'trigger';" triggers = atomic_transaction(self.conn, sql).fetchall() with atomic(self.conn) as conn: for trigger in triggers: remove_trigger(conn, trigger['name']) for sub in self.subscribers.values(): sub.schedule_stop() sub.join() self.subscribers.clear() def get_metadata(self, tag): return get_metadata(self.conn, tag, self.table_name) def __len__(self) -> int: return length(self.conn, self.table_name) def __repr__(self) -> str: out = [] heading = f"{self.name} #{self.run_id}@{self.path_to_db}" out.append(heading) out.append("-" * len(heading)) ps = self.get_parameters() if len(ps) > 0: for p in ps: out.append(f"{p.name} - {p.type}") return "\n".join(out)