def test_keys_of_result_of_to_dict(some_interdeps): for idps in some_interdeps: desc = RunDescriber(interdeps=idps) ser_desc = desc._to_dict() assert list(ser_desc.keys()) == ['version', 'interdependencies']
def test_get_description(experiment, some_interdeps): ds = DataSet() assert ds.run_id == 1 desc = ds.description assert desc == RunDescriber(InterDependencies_()) ds.set_interdependencies(some_interdeps[1]) assert ds.description.interdeps == some_interdeps[1] # the run description gets written as the dataset is marked as started, # so now no description should be stored in the database prematurely_loaded_ds = DataSet(run_id=1) assert prematurely_loaded_ds.description == RunDescriber( InterDependencies_()) ds.mark_started() loaded_ds = DataSet(run_id=1) expected_desc = RunDescriber(some_interdeps[1]) assert loaded_ds.description == expected_desc
def test_dictization_of_version_1(some_interdeps): """ Test conversion to dictionary of a RunDescriber version 1 object """ for idps in some_interdeps: desc = RunDescriber(idps) ser = desc._to_dict() assert ser['version'] == 1 assert ser['interdependencies'] == idps._to_dict() assert len(ser.keys()) == 2
def test_to_and_from_dict_roundtrip(some_interdeps): for idps in some_interdeps: desc = RunDescriber(interdeps=idps) ser_desc = desc._to_dict() new_desc = RunDescriber._from_dict(ser_desc) assert isinstance(new_desc, RunDescriber) assert desc == new_desc
def set_shapes(self, shapes: Shapes) -> None: """ Set the shapes of the data to be recorded in this measurement. Args: shapes: Dictionary from names of dependent parameters to a tuple of integers describing the shape of the measurement. """ RunDescriber._verify_interdeps_shape(interdeps=self._interdeps, shapes=shapes) self._shapes = shapes
def test_dictization_of_current_version(some_interdeps): """ Test conversion to dictionary of a RunDescriber """ for idps in some_interdeps: desc = RunDescriber(idps) idps_old = new_to_old(desc.interdeps) ser = desc._to_dict() assert ser['version'] == 3 assert ser['interdependencies'] == idps_old._to_dict() assert ser['interdependencies_'] == idps._to_dict() assert ser['shapes'] is None assert len(ser.keys()) == 4
def test_equality(some_paramspecbases): (psb1, psb2, psb3, _) = some_paramspecbases idp1 = InterDependencies_(dependencies={psb1: (psb2, psb3)}) idp2 = InterDependencies_(inferences={psb1: (psb2, psb3)}) idp3 = InterDependencies_(dependencies={psb1: (psb2, psb3)}) desc_1 = RunDescriber(interdeps=idp1) desc_2 = RunDescriber(interdeps=idp2) desc_3 = RunDescriber(interdeps=idp3) assert desc_1 == desc_3 assert desc_1 != desc_2 assert desc_3 != desc_2
def __init__( self, run_id: int, captured_run_id: int, counter: int, captured_counter: int, name: str, exp_id: int, exp_name: str, sample_name: str, guid: str, path_to_db: Optional[str], run_timestamp_raw: Optional[float], completed_timestamp_raw: Optional[float], snapshot: Optional[str] = None, metadata: Optional[Mapping[str, Any]] = None, rundescriber: Optional[RunDescriber] = None, parent_dataset_links: Optional[Sequence[Link]] = None, export_info: Optional[ExportInfo] = None, ) -> None: """Note that the constructor is considered private. A ``DataSetInMem`` should be constructed either using one of the load functions (``load_by_run_spec``, ``load_from_netcdf`` ...) or using the measurement context manager. """ self._run_id = run_id self._captured_run_id = captured_run_id self._counter = counter self._captured_counter = captured_counter self._name = name self._exp_id = exp_id self._exp_name = exp_name self._sample_name = sample_name self._guid = guid self._cache = DataSetCacheInMem(self) self._run_timestamp_raw = run_timestamp_raw self._completed_timestamp_raw = completed_timestamp_raw self._path_to_db = path_to_db if metadata is None: self._metadata = {} else: self._metadata = dict(metadata) if rundescriber is None: interdeps = InterDependencies_() rundescriber = RunDescriber(interdeps, shapes=None) self._rundescriber = rundescriber if parent_dataset_links is not None: self._parent_dataset_links = list(parent_dataset_links) else: self._parent_dataset_links = [] if export_info is not None: self._export_info = export_info else: self._export_info = ExportInfo({}) self._metadata["export_info"] = self._export_info.to_str() self._snapshot_raw_data = snapshot
def test_get_dependents(experiment): # more parameters, more complicated dependencies x = ParamSpecBase("x", "numeric") t = ParamSpecBase("t", "numeric") y = ParamSpecBase("y", "numeric") x_raw = ParamSpecBase("x_raw", "numeric") x_cooked = ParamSpecBase("x_cooked", "numeric") z = ParamSpecBase("z", "numeric") deps_param_tree = {y: (x, t), z: (x_cooked, )} inferred_param_tree = {x_cooked: (x_raw, )} interdeps = InterDependencies_(dependencies=deps_param_tree, inferences=inferred_param_tree) description = RunDescriber(interdeps=interdeps) (_, run_id, _) = mut_queries.create_run( experiment.conn, experiment.exp_id, name="testrun", guid=generate_guid(), description=description, ) deps = mut_queries._get_dependents(experiment.conn, run_id) expected_deps = [ mut_queries._get_layout_id(experiment.conn, 'y', run_id), mut_queries._get_layout_id(experiment.conn, 'z', run_id) ] assert deps == expected_deps
def _make_simple_run_describer(): x = ParamSpecBase("x", "numeric") t = ParamSpecBase("t", "numeric") y = ParamSpecBase("y", "numeric") paramtree = {y: (x, t)} interdependencies = InterDependencies_(dependencies=paramtree) rundescriber = RunDescriber(interdependencies) yield rundescriber
def test_update_runs_description(dataset): invalid_descs = ['{}', 'description'] for idesc in invalid_descs: with pytest.raises(ValueError): mut_queries.update_run_description(dataset.conn, dataset.run_id, idesc) desc = serial.to_json_for_storage(RunDescriber((InterDependencies_()))) mut_queries.update_run_description(dataset.conn, dataset.run_id, desc)
def test_fix_wrong_run_descriptions(): v3fixpath = os.path.join(fixturepath, 'db_files', 'version3') dbname_old = os.path.join(v3fixpath, 'some_runs_without_run_description.db') if not os.path.exists(dbname_old): pytest.skip("No db-file fixtures found. You can generate test db-files" " using the scripts in the legacy_DB_generation folder") def make_ps(n): ps = ParamSpec(f'p{n}', label=f'Parameter {n}', unit=f'unit {n}', paramtype='numeric') return ps paramspecs = [make_ps(n) for n in range(6)] paramspecs[2]._inferred_from = ['p0'] paramspecs[3]._inferred_from = ['p1', 'p0'] paramspecs[4]._depends_on = ['p2', 'p3'] paramspecs[5]._inferred_from = ['p0'] with temporarily_copied_DB(dbname_old, debug=False, version=3) as conn: assert get_user_version(conn) == 3 expected_description = RunDescriber( old_to_new(v0.InterDependencies(*paramspecs))) empty_description = RunDescriber(old_to_new(v0.InterDependencies())) fix_wrong_run_descriptions(conn, [1, 2, 3, 4]) for run_id in [1, 2, 3]: desc_str = get_run_description(conn, run_id) desc = serial.from_json_to_current(desc_str) assert desc == expected_description desc_str = get_run_description(conn, run_id=4) desc = serial.from_json_to_current(desc_str) assert desc == empty_description
def test_construct_currect_rundesciber_from_v2(some_interdeps): interdeps_ = some_interdeps[0] interdeps = new_to_old(interdeps_) v2 = RunDescriberV2Dict(interdependencies=interdeps._to_dict(), interdependencies_=interdeps_._to_dict(), version=2) rds1 = RunDescriber._from_dict(v2) rds2 = from_dict_to_current(v2) assert rds1._to_dict() == v2 assert rds2._to_dict() == v2
def test_construct_current_rundescriber_from_v3(some_interdeps): interdeps_ = some_interdeps[0] interdeps = new_to_old(interdeps_) v3 = RunDescriberV3Dict(interdependencies=interdeps._to_dict(), interdependencies_=interdeps_._to_dict(), version=3, shapes=None) rds1 = RunDescriber._from_dict(v3) rds_upgraded = from_dict_to_current(v3) assert rds1._to_dict() == v3 assert rds_upgraded._to_dict() == v3
def test_jsonization_as_v0_for_storage(some_interdeps): """ Test that a RunDescriber can be json-dumped as version 0 """ idps_new = some_interdeps[0] idps_old = new_to_old(idps_new) new_desc = RunDescriber(idps_new) old_json = json.dumps({'version': 0, 'interdependencies': idps_old._to_dict()}) assert serial.to_json_as_version(new_desc, 0) == old_json
def test_default_dictization_as_v0_for_storage(some_interdeps): """ Test that a RunDescriber always gets converted to dict that represents an old style RunDescriber, even when given new style interdeps """ idps_new = some_interdeps[0] idps_old = new_to_old(idps_new) new_desc = RunDescriber(idps_new) old_desc = {'version': 0, 'interdependencies': idps_old._to_dict()} assert serial.to_dict_for_storage(new_desc) == old_desc
def test_default_jsonization_for_storage(some_interdeps): """ Test that a RunDescriber is json-dumped as version 2 """ idps_new = some_interdeps[0] idps_old = new_to_old(idps_new) new_desc = RunDescriber(idps_new) expected_json = json.dumps({'version': 2, 'interdependencies': idps_old._to_dict(), 'interdependencies_': idps_new._to_dict()}) assert serial.to_json_for_storage(new_desc) == expected_json
def test_construct_currect_rundesciber_from_v1(some_interdeps): interdeps_ = some_interdeps[0] interdeps = new_to_old(interdeps_) v1 = RunDescriberV1Dict(interdependencies=interdeps_._to_dict(), version=1) rds1 = RunDescriber._from_dict(v1) rds2 = from_dict_to_current(v1) expected_v2_dict = RunDescriberV2Dict( interdependencies=interdeps._to_dict(), interdependencies_=interdeps_._to_dict(), version=2) assert rds1._to_dict() == expected_v2_dict assert rds2._to_dict() == expected_v2_dict
def test_construct_currect_rundesciber_from_fake_v3(some_interdeps): interdeps_ = some_interdeps[0] interdeps = new_to_old(interdeps_) v3 = RunDescriberV2Dict(interdependencies=interdeps._to_dict(), interdependencies_=interdeps_._to_dict(), version=3) v3['foobar'] = {"foo": ["bar"]} rds1 = RunDescriber._from_dict(v3) rds2 = from_dict_to_current(v3) v2 = v3.copy() v2.pop('foobar') v2['version'] = 2 assert rds1._to_dict() == v2 assert rds2._to_dict() == v2
def test_yaml_creation_and_loading(some_interdeps): yaml = YAML() for idps in some_interdeps: desc = RunDescriber(interdeps=idps) yaml_str = serial.to_yaml_for_storage(desc) assert isinstance(yaml_str, str) ydict = dict(yaml.load(yaml_str)) assert list(ydict.keys()) == ['version', 'interdependencies'] assert ydict['version'] == serial.STORAGE_VERSION new_desc = serial.from_yaml_to_current(yaml_str) assert new_desc == desc
def test_construct_current_rundescriber_from_v1(some_interdeps): interdeps_ = some_interdeps[0] interdeps = new_to_old(interdeps_) v1 = RunDescriberV1Dict(interdependencies=interdeps_._to_dict(), version=1) rds1 = RunDescriber._from_dict(v1) rds_upgraded = from_dict_to_current(v1) expected_v3_dict = RunDescriberV3Dict( interdependencies=interdeps._to_dict(), interdependencies_=interdeps_._to_dict(), version=3, shapes=None, ) assert rds1._to_dict() == expected_v3_dict assert rds_upgraded._to_dict() == expected_v3_dict
def test_construct_current_rundescriber_from_fake_v4(some_interdeps): interdeps_ = some_interdeps[0] interdeps = new_to_old(interdeps_) v4 = RunDescriberV3Dict(interdependencies=interdeps._to_dict(), interdependencies_=interdeps_._to_dict(), version=4, shapes=None) v4['foobar'] = {"foo": ["bar"]} rds1 = RunDescriber._from_dict(v4) rds_upgraded = from_dict_to_current(v4) v3 = v4.copy() v3.pop('foobar') v3['version'] = 3 assert rds1._to_dict() == v3 assert rds_upgraded._to_dict() == v3
def _set_interdependencies(self, interdeps: InterDependencies_, shapes: Shapes = None) -> None: """ Set the interdependencies object (which holds all added parameters and their relationships) of this dataset and optionally the shapes object that holds information about the shape of the data to be measured. """ if not isinstance(interdeps, InterDependencies_): raise TypeError("Wrong input type. Expected InterDepencies_, " f"got {type(interdeps)}") if not self.pristine: mssg = "Can not set interdependencies on a DataSet that has been started." raise RuntimeError(mssg) self._rundescriber = RunDescriber(interdeps, shapes=shapes)
def test_construct_currect_rundesciber_from_v0(some_paramspecs): pgroup1 = some_paramspecs[1] interdeps = InterDependencies(pgroup1['ps1'], pgroup1['ps2'], pgroup1['ps3'], pgroup1['ps4'], pgroup1['ps6']) v0 = RunDescriberV0Dict(interdependencies=interdeps._to_dict(), version=0) rds1 = RunDescriber._from_dict(v0) rds2 = from_dict_to_current(v0) expected_v2_dict = RunDescriberV2Dict( interdependencies=interdeps._to_dict(), interdependencies_=old_to_new(interdeps)._to_dict(), version=2) assert DeepDiff(rds1._to_dict(), expected_v2_dict, ignore_order=True) == {} assert DeepDiff(rds2._to_dict(), expected_v2_dict, ignore_order=True) == {}
def fix_wrong_run_descriptions(conn: ConnectionPlus, run_ids: Sequence[int]) -> None: """ NB: This is a FIX function. Do not use it unless your database has been diagnosed with the problem that this function fixes. Overwrite faulty run_descriptions by using information from the layouts and dependencies tables. If a correct description is found for a run, that run is left untouched. Args: conn: The connection to the database run_ids: The runs to (potentially) fix """ user_version = get_user_version(conn) if not user_version == 3: raise RuntimeError('Database of wrong version. Will not apply fix. ' 'Expected version 3, found version {user_version}') log.info('[*] Fixing run descriptions...') for run_id in run_ids: trusted_paramspecs = _get_parameters(conn, run_id) interdeps = v0.InterDependencies(*trusted_paramspecs) interdeps_ = old_to_new(interdeps) trusted_desc = RunDescriber(interdeps_) actual_desc_str = select_one_where(conn, "runs", "run_description", "run_id", run_id) trusted_json = serial.to_json_as_version(trusted_desc, 0) if actual_desc_str == trusted_json: log.info(f'[+] Run id: {run_id} had an OK description') else: log.info(f'[-] Run id: {run_id} had a broken description. ' f'Description found: {actual_desc_str}') update_run_description(conn, run_id, trusted_json) log.info(f' Run id: {run_id} has been updated.')
def test_wrong_input_type_raises(): for interdeps in ['interdeps', ['p1', 'p2'], 0]: with pytest.raises(ValueError): RunDescriber(interdeps=interdeps)
def description(self) -> RunDescriber: return RunDescriber(interdeps=self._interdeps)
def _insert_run( conn: ConnectionPlus, exp_id: int, name: str, guid: str, parameters: Optional[List[ParamSpec]] = None, ): # get run counter and formatter from experiments run_counter, format_string = select_many_where(conn, "experiments", "run_counter", "format_string", where_column="exp_id", where_value=exp_id) run_counter += 1 formatted_name = format_table_name(format_string, name, exp_id, run_counter) table = "runs" parameters = parameters or [] run_desc = RunDescriber(old_to_new(InterDependencies(*parameters))) desc_str = serial.to_json_for_storage(run_desc) with atomic(conn) as conn: if parameters: query = f""" INSERT INTO {table} (name, exp_id, guid, result_table_name, result_counter, run_timestamp, parameters, is_completed, run_description) VALUES (?,?,?,?,?,?,?,?,?) """ curr = transaction(conn, query, name, exp_id, guid, formatted_name, run_counter, None, ",".join([p.name for p in parameters]), False, desc_str) _add_parameters_to_layout_and_deps(conn, formatted_name, *parameters) else: query = f""" INSERT INTO {table} (name, exp_id, guid, result_table_name, result_counter, run_timestamp, is_completed, run_description) VALUES (?,?,?,?,?,?,?,?) """ curr = transaction(conn, query, name, exp_id, guid, formatted_name, run_counter, None, False, desc_str) run_id = curr.lastrowid return run_counter, formatted_name, run_id