Ejemplo n.º 1
0
def guids_from_dbs(
    db_paths: Iterable[Path],
) -> Tuple[Dict[Path, List[str]], Dict[str, Path]]:
    """
    Extract all guids from the supplied database paths.

    Args:
        db_paths: Path or str or directory where to search

    Returns:
        Tuple of Dictionary mapping paths to lists of guids as strings
        and Dictionary mapping guids to db paths.
    """
    dbdict = {}
    for p in db_paths:
        try:
            conn = connect(str(p))
            dbdict[p] = get_guids_from_run_spec(conn)
        except (RuntimeError, DatabaseError) as e:
            print(e)
        finally:
            conn.close()
            gc.collect()
    guiddict = {}
    for dbpath, guids in dbdict.items():
        guiddict.update({guid: dbpath for guid in guids})
    return dbdict, guiddict
Ejemplo n.º 2
0
def load_by_run_spec(*,
                     captured_run_id: Optional[int] = None,
                     captured_counter: Optional[int] = None,
                     experiment_name: Optional[str] = None,
                     sample_name: Optional[str] = None,
                     # guid parts
                     sample_id: Optional[int] = None,
                     location: Optional[int] = None,
                     work_station: Optional[int] = None,
                     conn: Optional[ConnectionPlus] = None) -> DataSet:
    """
    Load a run from one or more pieces of runs specification. All
    fields are optional but the function will raise an error if more than one
    run matching the supplied specification is found. Along with the error
    specs of the runs found will be printed.

    Args:
        captured_run_id: The run_id that was originally assigned to this
          at the time of capture.
        captured_counter: The counter that was originally assigned to this
          at the time of capture.
        experiment_name: name of the experiment that the run was captured
        sample_name: The name of the sample given when creating the experiment.
        sample_id: The sample_id assigned as part of the GUID.
        location: The location code assigned as part of GUID.
        work_station: The workstation assigned as part of the GUID.
        conn: An optional connection to the database. If no connection is
          supplied a connection to the default database will be opened.

    Raises:
        NameError: if no run or more than one run with the given specification
         exists in the database

    Returns:
        :class:`.DataSet` matching the provided specification.
    """
    conn = conn or connect(get_DB_location())
    guids = get_guids_from_run_spec(conn,
                                    captured_run_id=captured_run_id,
                                    captured_counter=captured_counter,
                                    experiment_name=experiment_name,
                                    sample_name=sample_name)

    matched_guids = filter_guids_by_parts(guids, location, sample_id,
                                          work_station)

    if len(matched_guids) == 1:
        return load_by_guid(matched_guids[0], conn)
    elif len(matched_guids) > 1:
        print(generate_dataset_table(matched_guids, conn=conn))
        raise NameError("More than one matching dataset found. "
                        "Please supply more information to uniquely"
                        "identify a dataset")
    else:
        raise NameError(f'No run matching the supplied information '
                        f'found.')
Ejemplo n.º 3
0
def test_get_guids_from_run_spec_warns():
    ds = new_data_set("test-dataset")
    run_id = ds.run_id
    ds.mark_started()
    ds.mark_completed()
    expected_guid = ds.guid
    with pytest.warns(
        expected_warning=QCoDeSDeprecationWarning, match="Unused part of private api"
    ):
        loaded_guids = get_guids_from_run_spec(captured_run_id=run_id, conn=ds.conn)
    assert len(loaded_guids) == 1
    assert loaded_guids[0] == expected_guid
Ejemplo n.º 4
0
def test_load_by_run_spec(empty_temp_db, some_interdeps):
    def create_ds_with_exp_id(exp_id):
        ds = DataSet(exp_id=exp_id)
        ds.set_interdependencies(some_interdeps[1])
        ds.mark_started()
        ds.add_results([{'ps1': 1, 'ps2': 2}])
        return ds

    # create 3 experiments that mix two experiment names and two sample names
    exp_names = ["te1", "te2", "te1"]
    sample_names = ["ts1", "ts2", "ts2"]

    exps = [
        new_experiment(exp_name, sample_name=sample_name)
        for exp_name, sample_name in zip(exp_names, sample_names)
    ]

    created_ds = [create_ds_with_exp_id(exp.exp_id) for exp in exps]

    conn = created_ds[0].conn

    guids = get_guids_from_run_spec(conn=conn)
    assert len(guids) == 3

    # since we are not copying runs from multiple dbs we can always load by
    # captured_run_id and this is equivalent to load_by_id
    for i in range(1, 4):
        loaded_ds = load_by_run_spec(captured_run_id=i, conn=conn)
        assert loaded_ds.guid == guids[i - 1]
        assert loaded_ds.the_same_dataset_as(created_ds[i - 1])

    # All the datasets datasets have the same captured counter
    # so we cannot load by that alone
    guids_cc1 = get_guids_from_run_spec(captured_counter=1, conn=conn)
    assert len(guids_cc1) == 3
    with pytest.raises(NameError, match="More than one matching"):
        load_by_run_spec(captured_counter=1)

    # there are two different experiments with exp name "test-experiment1"
    # and thus 2 different datasets with counter=1 and that exp name
    guids_cc1_te1 = get_guids_from_run_spec(captured_counter=1,
                                            experiment_name='te1',
                                            conn=conn)
    assert len(guids_cc1_te1) == 2
    with pytest.raises(NameError, match="More than one matching"):
        load_by_run_spec(captured_counter=1, experiment_name="te1", conn=conn)

    # but for "test-experiment2" there is only one
    guids_cc1_te2 = get_guids_from_run_spec(captured_counter=1,
                                            experiment_name='te2',
                                            conn=conn)
    assert len(guids_cc1_te2) == 1
    loaded_ds = load_by_run_spec(captured_counter=1,
                                 experiment_name="te2",
                                 conn=conn)
    assert loaded_ds.guid == guids_cc1_te2[0]
    assert loaded_ds.the_same_dataset_as(created_ds[1])

    # there are two different experiments with sample name "test_sample2" but
    # different exp names so the counter is not unique
    guids_cc1_ts2 = get_guids_from_run_spec(captured_counter=1,
                                            sample_name='ts2',
                                            conn=conn)
    assert len(guids_cc1_ts2) == 2
    with pytest.raises(NameError, match="More than one matching"):
        load_by_run_spec(captured_counter=1, sample_name="ts2", conn=conn)

    # but for  "test_sample1" there is only one
    guids_cc1_ts1 = get_guids_from_run_spec(captured_counter=1,
                                            sample_name='ts1',
                                            conn=conn)
    assert len(guids_cc1_ts1) == 1
    loaded_ds = load_by_run_spec(captured_counter=1,
                                 sample_name="ts1",
                                 conn=conn)
    assert loaded_ds.the_same_dataset_as(created_ds[0])
    assert loaded_ds.guid == guids_cc1_ts1[0]

    # we can load all 3 if we are specific.
    for i in range(3):
        loaded_ds = load_by_run_spec(captured_counter=1,
                                     experiment_name=exp_names[i],
                                     sample_name=sample_names[i],
                                     conn=conn)
        assert loaded_ds.the_same_dataset_as(created_ds[i])
        assert loaded_ds.guid == guids[i]

    # load a non-existing run
    with pytest.raises(NameError, match="No run matching"):
        load_by_run_spec(captured_counter=10000, sample_name="ts2", conn=conn)

    empty_guid_list = get_guids_from_run_spec(conn=conn,
                                              experiment_name='nosuchexp')
    assert empty_guid_list == []