def test_get_description(experiment, some_interdeps): ds = DataSet() assert ds.run_id == 1 desc = ds.description assert desc == RunDescriber(InterDependencies_()) ds.set_interdependencies(some_interdeps[1]) assert ds._interdeps == some_interdeps[1] # the run description gets written as the dataset is marked as started, # so now no description should be stored in the database prematurely_loaded_ds = DataSet(run_id=1) assert prematurely_loaded_ds.description == RunDescriber( InterDependencies_()) ds.mark_started() loaded_ds = DataSet(run_id=1) expected_desc = RunDescriber(some_interdeps[1]) assert loaded_ds.description == expected_desc
def test_get_description(some_paramspecs): paramspecs = some_paramspecs[2] ds = DataSet() assert ds.run_id == 1 desc = ds.description assert desc == RunDescriber(InterDependencies()) ds.add_parameter(paramspecs['ps1']) desc = ds.description assert desc == RunDescriber(InterDependencies(paramspecs['ps1'])) ds.add_parameter(paramspecs['ps2']) desc = ds.description assert desc == RunDescriber( InterDependencies(paramspecs['ps1'], paramspecs['ps2'])) # the run description gets written as the first data point is added, # so now no description should be stored in the database prematurely_loaded_ds = DataSet(run_id=1) assert prematurely_loaded_ds.description == RunDescriber( InterDependencies()) ds.add_result({'ps1': 1, 'ps2': 2}) loaded_ds = DataSet(run_id=1) assert loaded_ds.description == desc
def test_get_description(experiment, some_paramspecs): paramspecs = some_paramspecs[2] ds = DataSet() assert ds.run_id == 1 desc = ds.description assert desc == RunDescriber(InterDependencies()) ds.add_parameter(paramspecs['ps1']) desc = ds.description assert desc == RunDescriber(InterDependencies(paramspecs['ps1'])) ds.add_parameter(paramspecs['ps2']) desc = ds.description assert desc == RunDescriber(InterDependencies(paramspecs['ps1'], paramspecs['ps2'])) # the run description gets written as the dataset is marked as started, # so now no description should be stored in the database prematurely_loaded_ds = DataSet(run_id=1) assert prematurely_loaded_ds.description == RunDescriber(InterDependencies()) ds.mark_started() loaded_ds = DataSet(run_id=1) assert loaded_ds.description == desc
def test_serialization_and_back(some_paramspecs): idp = InterDependencies(*some_paramspecs[1].values()) desc = RunDescriber(interdeps=idp) ser_desc = desc.serialize() new_desc = RunDescriber.deserialize(ser_desc) assert isinstance(new_desc, RunDescriber) assert desc == new_desc
def test_equality(some_paramspecs): idp1 = InterDependencies(*some_paramspecs[1].values()) idp2 = InterDependencies(*some_paramspecs[2].values()) idp3 = InterDependencies(*some_paramspecs[1].values()) desc_1 = RunDescriber(interdeps=idp1) desc_2 = RunDescriber(interdeps=idp2) desc_3 = RunDescriber(interdeps=idp3) assert desc_1 == desc_3 assert desc_1 != desc_2 assert desc_3 != desc_2
def fix_version_4a_run_description_bug(conn: ConnectionPlus) -> Dict[str, int]: """ Fix function to fix a bug where the RunDescriber accidentally wrote itself to string using the (new) InterDependencies_ object instead of the (old) InterDependencies object. After the first run, this function should be idempotent. Args: conn: the connection to the database Returns: A dict with the fix results ('runs_inspected', 'runs_fixed') """ user_version = get_user_version(conn) if not user_version == 4: raise RuntimeError('Database of wrong version. Will not apply fix. ' 'Expected version 4, found version {user_version}') no_of_runs_query = "SELECT max(run_id) FROM runs" no_of_runs = one(atomic_transaction(conn, no_of_runs_query), 'max(run_id)') no_of_runs = no_of_runs or 0 with atomic(conn) as conn: pbar = tqdm(range(1, no_of_runs + 1)) pbar.set_description("Fixing database") # collect some metrics runs_inspected = 0 runs_fixed = 0 for run_id in pbar: desc_str = get_run_description(conn, run_id) desc_ser = json.loads(desc_str) idps_ser = desc_ser['interdependencies'] if RunDescriber._is_description_old_style(idps_ser): pass else: new_desc = RunDescriber.from_json(desc_str) update_run_description(conn, run_id, new_desc.to_json()) runs_fixed += 1 runs_inspected += 1 return {'runs_inspected': runs_inspected, 'runs_fixed': runs_fixed}
def test_yaml_creation_and_loading(some_paramspecs): yaml = YAML() for group in some_paramspecs.values(): paramspecs = group.values() idp = InterDependencies(*paramspecs) desc = RunDescriber(interdeps=idp) yaml_str = desc.to_yaml() assert isinstance(yaml_str, str) ydict = dict(yaml.load(yaml_str)) assert list(ydict.keys()) == ['interdependencies'] new_desc = RunDescriber.from_yaml(yaml_str) assert new_desc == desc
def _get_run_description_from_db(self) -> RunDescriber: """ Look up the run_description from the database """ desc_str = select_one_where(self.conn, "runs", "run_description", "run_id", self.run_id) return RunDescriber.from_json(desc_str)
def test_fix_wrong_run_descriptions(): v3fixpath = os.path.join(fixturepath, 'db_files', 'version3') dbname_old = os.path.join(v3fixpath, 'some_runs_without_run_description.db') if not os.path.exists(dbname_old): pytest.skip("No db-file fixtures found. You can generate test db-files" " using the scripts in the legacy_DB_generation folder") with temporarily_copied_DB(dbname_old, debug=False, version=3) as conn: assert get_user_version(conn) == 3 ds1 = DataSet(conn=conn, run_id=1) expected_description = ds1.description empty_description = RunDescriber(InterDependencies_()) _fix_wrong_run_descriptions(conn, [1, 2, 3, 4]) ds2 = DataSet(conn=conn, run_id=2) assert expected_description == ds2.description ds3 = DataSet(conn=conn, run_id=3) assert expected_description == ds3.description ds4 = DataSet(conn=conn, run_id=4) assert empty_description == ds4.description
def test_update_runs_description(dataset): invalid_descs = ['{}', 'description'] for idesc in invalid_descs: with pytest.raises(ValueError): mut.update_run_description(dataset.conn, dataset.run_id, idesc) desc = RunDescriber(InterDependencies()).to_json() mut.update_run_description(dataset.conn, dataset.run_id, desc)
def update_run_description(conn: ConnectionPlus, run_id: int, description: str) -> None: """ Update the run_description field for the given run_id. The description string must be a valid JSON string representation of a RunDescriber object """ try: RunDescriber.from_json(description) except Exception as e: raise ValueError("Invalid description string. Must be a JSON string " "representaion of a RunDescriber object.") from e sql = """ UPDATE runs SET run_description = ? WHERE run_id = ? """ with atomic(conn) as conn: conn.cursor().execute(sql, (description, run_id))
def test_yaml_creation_and_loading(some_paramspecs): try: YAML = RunDescriber._ruamel_importer() except ImportError: pytest.skip('No ruamel module installed, skipping test') yaml = YAML() for group in some_paramspecs.values(): paramspecs = group.values() idp = InterDependencies(*paramspecs) desc = RunDescriber(interdeps=idp) yaml_str = desc.to_yaml() assert isinstance(yaml_str, str) ydict = dict(yaml.load(yaml_str)) assert list(ydict.keys()) == ['interdependencies'] new_desc = RunDescriber.from_yaml(yaml_str) assert new_desc == desc
def test_perform_actual_upgrade_2_to_3_some_runs(): v2fixpath = os.path.join(fixturepath, 'db_files', 'version2') dbname_old = os.path.join(v2fixpath, 'some_runs.db') if not os.path.exists(dbname_old): pytest.skip("No db-file fixtures found. You can generate test db-files" " using the scripts in the legacy_DB_generation folder") with temporarily_copied_DB(dbname_old, debug=False, version=2) as conn: assert get_user_version(conn) == 2 perform_db_upgrade_2_to_3(conn) desc_query = 'SELECT run_description FROM runs' c = atomic_transaction(conn, desc_query) assert len(c.fetchall()) == 10 # retrieve the json string and recreate the object sql = f""" SELECT run_description FROM runs WHERE run_id == 1 """ c = atomic_transaction(conn, sql) json_str = one(c, 'run_description') desc = RunDescriber.from_json(json_str) idp = desc.interdeps assert isinstance(idp, InterDependencies) # here we verify that the dependencies encoded in # tests/dataset/legacy_DB_generation/generate_version_2.py # are recovered p0 = [p for p in idp.paramspecs if p.name == 'p0'][0] assert p0.depends_on == '' assert p0.inferred_from == '' assert p0.label == "Parameter 0" assert p0.unit == "unit 0" p4 = [p for p in idp.paramspecs if p.name == 'p4'][0] assert p4.depends_on == 'p2, p3' assert p4.inferred_from == '' assert p4.label == "Parameter 4" assert p4.unit == "unit 4"
def fix_wrong_run_descriptions(conn: ConnectionPlus, run_ids: Sequence[int]) -> None: """ NB: This is a FIX function. Do not use it unless your database has been diagnosed with the problem that this function fixes. Overwrite faulty run_descriptions by using information from the layouts and dependencies tables. If a correct description is found for a run, that run is left untouched. Args: conn: The connection to the database run_ids: The runs to (potentially) fix """ user_version = get_user_version(conn) if not user_version == 3: raise RuntimeError('Database of wrong version. Will not apply fix. ' 'Expected version 3, found version {user_version}') log.info('[*] Fixing run descriptions...') for run_id in run_ids: trusted_paramspecs = get_parameters(conn, run_id) trusted_desc = RunDescriber(interdeps=InterDependencies( *trusted_paramspecs)) actual_desc_str = select_one_where(conn, "runs", "run_description", "run_id", run_id) if actual_desc_str == trusted_desc.to_json(): log.info(f'[+] Run id: {run_id} had an OK description') else: log.info(f'[-] Run id: {run_id} had a broken description. ' f'Description found: {actual_desc_str}') update_run_description(conn, run_id, trusted_desc.to_json()) log.info(f' Run id: {run_id} has been updated.')
def test_serialization_as_old(some_paramspecs): """ Test that a RunDescriber always serializes itself as an old style RunDescriber, even when given new style interdeps """ idps_old = InterDependencies(*some_paramspecs[2].values()) idps_new = old_to_new(idps_old) new_desc = RunDescriber(idps_new) old_desc = RunDescriber(idps_old) assert new_desc.serialize() == old_desc.serialize()
def upgrade_3_to_4(conn: ConnectionPlus) -> None: """ Perform the upgrade from version 3 to version 4. This really repeats the version 3 upgrade as it originally had two bugs in the inferred annotation. inferred_from was passed incorrectly resulting in the parameter being marked inferred_from for each char in the inferred_from variable and inferred_from was not handled correctly for parameters that were neither dependencies nor dependent on other parameters. Both have since been fixed so rerun the upgrade. """ no_of_runs_query = "SELECT max(run_id) FROM runs" no_of_runs = one(atomic_transaction(conn, no_of_runs_query), 'max(run_id)') no_of_runs = no_of_runs or 0 # If one run fails, we want the whole upgrade to roll back, hence the # entire upgrade is one atomic transaction with atomic(conn) as conn: result_tables = _2to3_get_result_tables(conn) layout_ids_all = _2to3_get_layout_ids(conn) indeps_all = _2to3_get_indeps(conn) deps_all = _2to3_get_deps(conn) layouts = _2to3_get_layouts(conn) dependencies = _2to3_get_dependencies(conn) pbar = tqdm(range(1, no_of_runs + 1)) pbar.set_description("Upgrading database") for run_id in pbar: if run_id in layout_ids_all: result_table_name = result_tables[run_id] layout_ids = list(layout_ids_all[run_id]) if run_id in indeps_all: independents = tuple(indeps_all[run_id]) else: independents = () if run_id in deps_all: dependents = tuple(deps_all[run_id]) else: dependents = () paramspecs = _2to3_get_paramspecs(conn, layout_ids, layouts, dependencies, dependents, independents, result_table_name) interdeps = InterDependencies(*paramspecs.values()) desc = RunDescriber(interdeps=interdeps) json_str = desc.to_json() else: json_str = RunDescriber(InterDependencies()).to_json() sql = f""" UPDATE runs SET run_description = ? WHERE run_id == ? """ cur = conn.cursor() cur.execute(sql, (json_str, run_id)) log.debug(f"Upgrade in transition, run number {run_id}: OK")
def _get_run_description_from_db(self) -> RunDescriber: """ Look up the run_description from the database """ desc_str = get_run_description(self.conn, self.run_id) return RunDescriber.from_json(desc_str)
def description(self) -> RunDescriber: return RunDescriber(interdeps=self._interdeps)
def generate_DB_file_with_some_runs_having_not_run_descriptions(): """ Generate a .db-file with a handful of runs some of which lack run description or have it as empty object (based on a real case). Generated runs: #1: run with parameters and correct run description #2: run with parameters but run description is NULL #3: run with parameters but run description is empty RunDescriber #4: run without parameters but run description is NULL """ v3fixturepath = os.path.join(fixturepath, 'version3') os.makedirs(v3fixturepath, exist_ok=True) path = os.path.join(v3fixturepath, 'some_runs_without_run_description.db') if os.path.exists(path): os.remove(path) from qcodes.dataset.measurements import Measurement from qcodes.dataset.experiment_container import Experiment from qcodes import Parameter from qcodes.dataset.descriptions import RunDescriber from qcodes.dataset.dependencies import InterDependencies exp = Experiment(path_to_db=path, name='experiment_1', sample_name='no_sample_1') conn = exp.conn # Now make some parameters to use in measurements params = [] for n in range(5): params.append( Parameter(f'p{n}', label=f'Parameter {n}', unit=f'unit {n}', set_cmd=None, get_cmd=None)) # Set up a measurement meas = Measurement(exp) meas.register_parameter(params[0]) meas.register_parameter(params[1]) meas.register_parameter(params[2], basis=(params[0], )) meas.register_parameter(params[3], basis=(params[1], )) meas.register_parameter(params[4], setpoints=(params[2], params[3])) # Initially make 3 correct runs run_ids = [] for _ in range(3): with meas.run() as datasaver: for x in np.random.rand(10): for y in np.random.rand(10): z = np.random.rand() datasaver.add_result((params[2], x), (params[3], y), (params[4], z)) run_ids.append(datasaver.run_id) assert [1, 2, 3] == run_ids, 'Run ids of generated runs are not as ' \ 'expected after generating runs #1-3' # Formulate SQL query for adjusting run_description column set_run_description_sql = f""" UPDATE runs SET run_description = ? WHERE run_id == ? """ # Make run_description of run #2 NULL conn.execute(set_run_description_sql, (None, run_ids[1])) conn.commit() # just to be sure # Make run_description of run #3 equivalent to an empty RunDescriber empty_run_description = RunDescriber(InterDependencies()).to_json() conn.execute(set_run_description_sql, (empty_run_description, run_ids[2])) conn.commit() # just to be sure # Set up a measurement without parameters, and create run #4 out of it meas_no_params = Measurement(exp) with meas_no_params.run() as datasaver: pass run_ids.append(datasaver.run_id) assert [1, 2, 3, 4] == run_ids, 'Run ids of generated runs are not as ' \ 'expected after generating run #4' # Make run_description of run #4 NULL conn.execute(set_run_description_sql, (None, run_ids[3])) conn.commit() # just to be sure
def test_perform_upgrade_v3_to_v4(): """ Test that a db upgrade from v2 to v4 works correctly. """ v3fixpath = os.path.join(fixturepath, 'db_files', 'version3') dbname_old = os.path.join(v3fixpath, 'some_runs_upgraded_2.db') if not os.path.exists(dbname_old): pytest.skip("No db-file fixtures found. You can generate test db-files" " using the scripts in the " "https://github.com/QCoDeS/qcodes_generate_test_db/ repo") with temporarily_copied_DB(dbname_old, debug=False, version=3) as conn: assert get_user_version(conn) == 3 sql = f""" SELECT run_description FROM runs WHERE run_id == 1 """ perform_db_upgrade_3_to_4(conn) c = atomic_transaction(conn, sql) json_str = one(c, 'run_description') desc = RunDescriber.from_json(json_str) idp = desc.interdeps assert isinstance(idp, InterDependencies) p0 = [p for p in idp.paramspecs if p.name == 'p0'][0] assert p0.depends_on == '' assert p0.depends_on_ == [] assert p0.inferred_from == '' assert p0.inferred_from_ == [] assert p0.label == "Parameter 0" assert p0.unit == "unit 0" p1 = [p for p in idp.paramspecs if p.name == 'p1'][0] assert p1.depends_on == '' assert p1.depends_on_ == [] assert p1.inferred_from == '' assert p1.inferred_from_ == [] assert p1.label == "Parameter 1" assert p1.unit == "unit 1" p2 = [p for p in idp.paramspecs if p.name == 'p2'][0] assert p2.depends_on == '' assert p2.depends_on_ == [] assert p2.inferred_from == 'p0' assert p2.inferred_from_ == ['p0'] assert p2.label == "Parameter 2" assert p2.unit == "unit 2" p3 = [p for p in idp.paramspecs if p.name == 'p3'][0] assert p3.depends_on == '' assert p3.depends_on_ == [] assert p3.inferred_from == 'p1, p0' assert p3.inferred_from_ == ['p1', 'p0'] assert p3.label == "Parameter 3" assert p3.unit == "unit 3" p4 = [p for p in idp.paramspecs if p.name == 'p4'][0] assert p4.depends_on == 'p2, p3' assert p4.depends_on_ == ['p2', 'p3'] assert p4.inferred_from == '' assert p4.inferred_from_ == [] assert p4.label == "Parameter 4" assert p4.unit == "unit 4" p5 = [p for p in idp.paramspecs if p.name == 'p5'][0] assert p5.depends_on == '' assert p5.depends_on_ == [] assert p5.inferred_from == 'p0' assert p5.inferred_from_ == ['p0'] assert p5.label == "Parameter 5" assert p5.unit == "unit 5"
def _insert_run( conn: ConnectionPlus, exp_id: int, name: str, guid: str, parameters: Optional[List[ParamSpec]] = None, ): # get run counter and formatter from experiments run_counter, format_string = select_many_where(conn, "experiments", "run_counter", "format_string", where_column="exp_id", where_value=exp_id) run_counter += 1 formatted_name = format_table_name(format_string, name, exp_id, run_counter) table = "runs" parameters = parameters or [] desc_str = RunDescriber(InterDependencies(*parameters)).to_json() with atomic(conn) as conn: if parameters: query = f""" INSERT INTO {table} (name, exp_id, guid, result_table_name, result_counter, run_timestamp, parameters, is_completed, run_description) VALUES (?,?,?,?,?,?,?,?,?) """ curr = transaction(conn, query, name, exp_id, guid, formatted_name, run_counter, None, ",".join([p.name for p in parameters]), False, desc_str) _add_parameters_to_layout_and_deps(conn, formatted_name, *parameters) else: query = f""" INSERT INTO {table} (name, exp_id, guid, result_table_name, result_counter, run_timestamp, is_completed, run_description) VALUES (?,?,?,?,?,?,?,?) """ curr = transaction(conn, query, name, exp_id, guid, formatted_name, run_counter, None, False, desc_str) run_id = curr.lastrowid return run_counter, formatted_name, run_id
def upgrade_2_to_3(conn: ConnectionPlus) -> None: """ Perform the upgrade from version 2 to version 3 Insert a new column, run_description, to the runs table and fill it out for exisitng runs with information retrieved from the layouts and dependencies tables represented as the to_json output of a RunDescriber object """ no_of_runs_query = "SELECT max(run_id) FROM runs" no_of_runs = one(atomic_transaction(conn, no_of_runs_query), 'max(run_id)') no_of_runs = no_of_runs or 0 # If one run fails, we want the whole upgrade to roll back, hence the # entire upgrade is one atomic transaction with atomic(conn) as conn: sql = "ALTER TABLE runs ADD COLUMN run_description TEXT" transaction(conn, sql) result_tables = _2to3_get_result_tables(conn) layout_ids_all = _2to3_get_layout_ids(conn) indeps_all = _2to3_get_indeps(conn) deps_all = _2to3_get_deps(conn) layouts = _2to3_get_layouts(conn) dependencies = _2to3_get_dependencies(conn) pbar = tqdm(range(1, no_of_runs + 1)) pbar.set_description("Upgrading database") for run_id in pbar: if run_id in layout_ids_all: result_table_name = result_tables[run_id] layout_ids = list(layout_ids_all[run_id]) if run_id in indeps_all: independents = tuple(indeps_all[run_id]) else: independents = () if run_id in deps_all: dependents = tuple(deps_all[run_id]) else: dependents = () paramspecs = _2to3_get_paramspecs(conn, layout_ids, layouts, dependencies, dependents, independents, result_table_name) interdeps = InterDependencies(*paramspecs.values()) desc = RunDescriber(interdeps=interdeps) json_str = desc.to_json() else: json_str = RunDescriber(InterDependencies()).to_json() sql = f""" UPDATE runs SET run_description = ? WHERE run_id == ? """ cur = conn.cursor() cur.execute(sql, (json_str, run_id)) log.debug(f"Upgrade in transition, run number {run_id}: OK")
def test_serialization_dict_keys(some_paramspecs): idp = InterDependencies(*some_paramspecs[1].values()) desc = RunDescriber(interdeps=idp) ser_desc = desc.serialize() assert list(ser_desc.keys()) == ['interdependencies']
def __init__(self, path_to_db: str = None, run_id: Optional[int] = None, conn: Optional[ConnectionPlus] = None, exp_id=None, name: str = None, specs: SPECS = None, values=None, metadata=None) -> None: """ Create a new DataSet object. The object can either hold a new run or an already existing run. If a run_id is provided, then an old run is looked up, else a new run is created. Args: path_to_db: path to the sqlite file on disk. If not provided, the path will be read from the config. run_id: provide this when loading an existing run, leave it as None when creating a new run conn: connection to the DB; if provided and `path_to_db` is provided as well, then a ValueError is raised (this is to prevent the possibility of providing a connection to a DB file that is different from `path_to_db`) exp_id: the id of the experiment in which to create a new run. Ignored if run_id is provided. name: the name of the dataset. Ignored if run_id is provided. specs: paramspecs belonging to the dataset. Ignored if run_id is provided. values: values to insert into the dataset. Ignored if run_id is provided. metadata: metadata to insert into the dataset. Ignored if run_id is provided. """ if path_to_db is not None and conn is not None: raise ValueError("Both `path_to_db` and `conn` arguments have " "been passed together with non-None values. " "This is not allowed.") self._path_to_db = path_to_db or get_DB_location() self.conn = make_connection_plus_from(conn) if conn is not None else \ connect(self.path_to_db) self._run_id = run_id self._debug = False self.subscribers: Dict[str, _Subscriber] = {} if run_id is not None: if not run_exists(self.conn, run_id): raise ValueError(f"Run with run_id {run_id} does not exist in " f"the database") self._completed = completed(self.conn, self.run_id) self._started = self.number_of_results > 0 self._description = self._get_run_description_from_db() self._metadata = get_metadata_from_run_id(self.conn, run_id) else: # Actually perform all the side effects needed for the creation # of a new dataset if exp_id is None: if len(get_experiments(self.conn)) > 0: exp_id = get_last_experiment(self.conn) else: raise ValueError("No experiments found." "You can start a new one with:" " new_experiment(name, sample_name)") name = name or "dataset" _, run_id, __ = create_run(self.conn, exp_id, name, generate_guid(), specs, values, metadata) # this is really the UUID (an ever increasing count in the db) self._run_id = run_id self._completed = False self._started = False specs = specs or [] self._description = RunDescriber(InterDependencies(*specs)) self._metadata = get_metadata_from_run_id(self.conn, self.run_id)
def test_wrong_input_type_raises(): for interdeps in ['interdeps', ['p1', 'p2'], 0]: with pytest.raises(ValueError): RunDescriber(interdeps=interdeps)