def guids_from_dbs( db_paths: Iterable[Path], ) -> Tuple[Dict[Path, List[str]], Dict[str, Path]]: """ Extract all guids from the supplied database paths. Args: db_paths: Path or str or directory where to search Returns: Tuple of Dictionary mapping paths to lists of guids as strings and Dictionary mapping guids to db paths. """ dbdict = {} for p in db_paths: try: conn = connect(str(p)) dbdict[p] = get_guids_from_run_spec(conn) except (RuntimeError, DatabaseError) as e: print(e) finally: conn.close() gc.collect() guiddict = {} for dbpath, guids in dbdict.items(): guiddict.update({guid: dbpath for guid in guids}) return dbdict, guiddict
def load_by_run_spec(*, captured_run_id: Optional[int] = None, captured_counter: Optional[int] = None, experiment_name: Optional[str] = None, sample_name: Optional[str] = None, # guid parts sample_id: Optional[int] = None, location: Optional[int] = None, work_station: Optional[int] = None, conn: Optional[ConnectionPlus] = None) -> DataSet: """ Load a run from one or more pieces of runs specification. All fields are optional but the function will raise an error if more than one run matching the supplied specification is found. Along with the error specs of the runs found will be printed. Args: captured_run_id: The run_id that was originally assigned to this at the time of capture. captured_counter: The counter that was originally assigned to this at the time of capture. experiment_name: name of the experiment that the run was captured sample_name: The name of the sample given when creating the experiment. sample_id: The sample_id assigned as part of the GUID. location: The location code assigned as part of GUID. work_station: The workstation assigned as part of the GUID. conn: An optional connection to the database. If no connection is supplied a connection to the default database will be opened. Raises: NameError: if no run or more than one run with the given specification exists in the database Returns: :class:`.DataSet` matching the provided specification. """ conn = conn or connect(get_DB_location()) guids = get_guids_from_run_spec(conn, captured_run_id=captured_run_id, captured_counter=captured_counter, experiment_name=experiment_name, sample_name=sample_name) matched_guids = filter_guids_by_parts(guids, location, sample_id, work_station) if len(matched_guids) == 1: return load_by_guid(matched_guids[0], conn) elif len(matched_guids) > 1: print(generate_dataset_table(matched_guids, conn=conn)) raise NameError("More than one matching dataset found. " "Please supply more information to uniquely" "identify a dataset") else: raise NameError(f'No run matching the supplied information ' f'found.')
def test_get_guids_from_run_spec_warns(): ds = new_data_set("test-dataset") run_id = ds.run_id ds.mark_started() ds.mark_completed() expected_guid = ds.guid with pytest.warns( expected_warning=QCoDeSDeprecationWarning, match="Unused part of private api" ): loaded_guids = get_guids_from_run_spec(captured_run_id=run_id, conn=ds.conn) assert len(loaded_guids) == 1 assert loaded_guids[0] == expected_guid
def test_load_by_run_spec(empty_temp_db, some_interdeps): def create_ds_with_exp_id(exp_id): ds = DataSet(exp_id=exp_id) ds.set_interdependencies(some_interdeps[1]) ds.mark_started() ds.add_results([{'ps1': 1, 'ps2': 2}]) return ds # create 3 experiments that mix two experiment names and two sample names exp_names = ["te1", "te2", "te1"] sample_names = ["ts1", "ts2", "ts2"] exps = [ new_experiment(exp_name, sample_name=sample_name) for exp_name, sample_name in zip(exp_names, sample_names) ] created_ds = [create_ds_with_exp_id(exp.exp_id) for exp in exps] conn = created_ds[0].conn guids = get_guids_from_run_spec(conn=conn) assert len(guids) == 3 # since we are not copying runs from multiple dbs we can always load by # captured_run_id and this is equivalent to load_by_id for i in range(1, 4): loaded_ds = load_by_run_spec(captured_run_id=i, conn=conn) assert loaded_ds.guid == guids[i - 1] assert loaded_ds.the_same_dataset_as(created_ds[i - 1]) # All the datasets datasets have the same captured counter # so we cannot load by that alone guids_cc1 = get_guids_from_run_spec(captured_counter=1, conn=conn) assert len(guids_cc1) == 3 with pytest.raises(NameError, match="More than one matching"): load_by_run_spec(captured_counter=1) # there are two different experiments with exp name "test-experiment1" # and thus 2 different datasets with counter=1 and that exp name guids_cc1_te1 = get_guids_from_run_spec(captured_counter=1, experiment_name='te1', conn=conn) assert len(guids_cc1_te1) == 2 with pytest.raises(NameError, match="More than one matching"): load_by_run_spec(captured_counter=1, experiment_name="te1", conn=conn) # but for "test-experiment2" there is only one guids_cc1_te2 = get_guids_from_run_spec(captured_counter=1, experiment_name='te2', conn=conn) assert len(guids_cc1_te2) == 1 loaded_ds = load_by_run_spec(captured_counter=1, experiment_name="te2", conn=conn) assert loaded_ds.guid == guids_cc1_te2[0] assert loaded_ds.the_same_dataset_as(created_ds[1]) # there are two different experiments with sample name "test_sample2" but # different exp names so the counter is not unique guids_cc1_ts2 = get_guids_from_run_spec(captured_counter=1, sample_name='ts2', conn=conn) assert len(guids_cc1_ts2) == 2 with pytest.raises(NameError, match="More than one matching"): load_by_run_spec(captured_counter=1, sample_name="ts2", conn=conn) # but for "test_sample1" there is only one guids_cc1_ts1 = get_guids_from_run_spec(captured_counter=1, sample_name='ts1', conn=conn) assert len(guids_cc1_ts1) == 1 loaded_ds = load_by_run_spec(captured_counter=1, sample_name="ts1", conn=conn) assert loaded_ds.the_same_dataset_as(created_ds[0]) assert loaded_ds.guid == guids_cc1_ts1[0] # we can load all 3 if we are specific. for i in range(3): loaded_ds = load_by_run_spec(captured_counter=1, experiment_name=exp_names[i], sample_name=sample_names[i], conn=conn) assert loaded_ds.the_same_dataset_as(created_ds[i]) assert loaded_ds.guid == guids[i] # load a non-existing run with pytest.raises(NameError, match="No run matching"): load_by_run_spec(captured_counter=10000, sample_name="ts2", conn=conn) empty_guid_list = get_guids_from_run_spec(conn=conn, experiment_name='nosuchexp') assert empty_guid_list == []