Esempio n. 1
0
def experiments() -> List[Experiment]:
    """
    List all the experiments in the container (database file from config)

    Returns:
        All the experiments in the container
    """
    log.info("loading experiments from {}".format(get_DB_location()))
    rows = get_experiments(connect(get_DB_location(), get_DB_debug()))
    experiments = []
    for row in rows:
        experiments.append(load_experiment(row['exp_id']))
    return experiments
Esempio n. 2
0
def load_last_experiment() -> Experiment:
    """
    Load last experiment (from database file from config)

    Returns:
        last experiment
    """
    conn = connect(get_DB_location())
    last_exp_id = get_last_experiment(conn)
    if last_exp_id is None:
        raise ValueError('There are no experiments in the database file')
    experiment = Experiment(exp_id=last_exp_id)
    _set_default_experiment_id(get_DB_location(), experiment.exp_id)
    return experiment
Esempio n. 3
0
def new_experiment(name: str,
                   sample_name: Optional[str],
                   format_string: str = "{}-{}-{}",
                   conn: Optional[ConnectionPlus] = None) -> Experiment:
    """
    Create a new experiment (in the database file from config)

    Args:
        name: the name of the experiment
        sample_name: the name of the current sample
        format_string: basic format string for table-name
            must contain 3 placeholders.
        conn: connection to the database. If not supplied, a new connection
          to the DB file specified in the config is made
    Returns:
        the new experiment
    """
    sample_name = sample_name or "some_sample"
    conn = conn or connect(get_DB_location())
    exp_ids = get_matching_exp_ids(conn, name=name, sample_name=sample_name)
    if len(exp_ids) >= 1:
        log.warning(
            f"There is (are) already experiment(s) with the name of {name} "
            f"and sample name of {sample_name} in the database.")
    experiment = Experiment(name=name,
                            sample_name=sample_name,
                            format_string=format_string,
                            conn=conn)
    _set_default_experiment_id(path_to_dbfile(conn), experiment.exp_id)
    return experiment
Esempio n. 4
0
def load_by_counter(counter: int, exp_id: int,
                    conn: Optional[ConnectionPlus] = None) -> DataSet:
    """
    Load a dataset given its counter in a given experiment

    Lookup is performed in the database file that is specified in the config.

    Note that the `counter` used in this function in not preserved when copying
    data to another db file. We recommend using :func:`.load_by_run_spec` which
    does not have this issue and is significantly more flexible.

    Args:
        counter: counter of the dataset within the given experiment
        exp_id: id of the experiment where to look for the dataset
        conn: connection to the database to load from. If not provided, a
          connection to the DB file specified in the config is made

    Returns:
        :class:`.DataSet` of the given counter in the given experiment
    """
    conn = conn or connect(get_DB_location())
    sql = """
    SELECT run_id
    FROM
      runs
    WHERE
      result_counter= ? AND
      exp_id = ?
    """
    c = transaction(conn, sql, counter, exp_id)
    run_id = one(c, 'run_id')

    d = DataSet(conn=conn, run_id=run_id)
    return d
Esempio n. 5
0
def load_by_guid(guid: str, conn: Optional[ConnectionPlus] = None) -> DataSet:
    """
    Load a dataset by its GUID

    If no connection is provided, lookup is performed in the database file that
    is specified in the config.

    Args:
        guid: guid of the dataset
        conn: connection to the database to load from

    Returns:
        dataset with the given guid

    Raises:
        NameError: if no run with the given GUID exists in the database
        RuntimeError: if several runs with the given GUID are found
    """
    conn = conn or connect(get_DB_location())

    # this function raises a RuntimeError if more than one run matches the GUID
    run_id = get_runid_from_guid(conn, guid)

    if run_id == -1:
        raise NameError(f'No run with GUID: {guid} found in database.')

    return DataSet(run_id=run_id, conn=conn)
Esempio n. 6
0
def load_experiment_by_name(
        name: str,
        sample: Optional[str] = None,
        conn: Optional[ConnectionPlus] = None) -> Experiment:
    """
    Try to load experiment with the specified name.

    Nothing stops you from having many experiments with the same name and
    sample_name. In that case this won't work. And warn you.

    Args:
        name: the name of the experiment
        sample: the name of the sample
        conn: connection to the database. If not supplied, a new connection
          to the DB file specified in the config is made

    Returns:
        the requested experiment

    Raises:
        ValueError if the name is not unique and sample name is None.
    """
    conn = conn or connect(get_DB_location())

    if sample:
        sql = """
        SELECT
            *
        FROM
            experiments
        WHERE
            sample_name = ? AND
            name = ?
        """
        c = transaction(conn, sql, sample, name)
    else:
        sql = """
        SELECT
            *
        FROM
            experiments
        WHERE
            name = ?
        """
        c = transaction(conn, sql, name)
    rows = c.fetchall()
    if len(rows) == 0:
        raise ValueError("Experiment not found")
    elif len(rows) > 1:
        _repr = []
        for row in rows:
            s = (f"exp_id:{row['exp_id']} ({row['name']}-{row['sample_name']})"
                 f" started at ({row['start_time']})")
            _repr.append(s)
        _repr_str = "\n".join(_repr)
        raise ValueError(f"Many experiments matching your request"
                         f" found:\n{_repr_str}")
    else:
        e = Experiment(exp_id=rows[0]['exp_id'], conn=conn)
    return e
Esempio n. 7
0
def load_or_create_experiment(
        experiment_name: str,
        sample_name: Optional[str] = None,
        conn: Optional[ConnectionPlus] = None) -> Experiment:
    """
    Find and return an experiment with the given name and sample name,
    or create one if not found.

    Args:
        experiment_name: Name of the experiment to find or create
        sample_name: Name of the sample
        conn: Connection to the database. If not supplied, a new connection
          to the DB file specified in the config is made

    Returns:
        The found or created experiment
    """
    conn = conn or connect(get_DB_location())
    try:
        experiment = load_experiment_by_name(experiment_name,
                                             sample_name,
                                             conn=conn)
    except ValueError as exception:
        if "Experiment not found" in str(exception):
            experiment = new_experiment(experiment_name,
                                        sample_name,
                                        conn=conn)
        else:
            raise exception
    return experiment
Esempio n. 8
0
def test_has_attributes_after_init():
    """
    Ensure that all attributes are populated after __init__ in BOTH cases
    (run_id is None / run_id is not None)
    """

    attrs = ['path_to_db', 'conn', '_run_id', 'run_id',
             '_debug', 'subscribers', '_completed', 'name', 'table_name',
             'guid', 'number_of_results', 'counter', 'parameters',
             'paramspecs', 'exp_id', 'exp_name', 'sample_name',
             'run_timestamp_raw', 'completed_timestamp_raw', 'completed',
             'snapshot', 'snapshot_raw']

    path_to_db = get_DB_location()
    ds = DataSet(path_to_db, run_id=None)

    for attr in attrs:
        assert hasattr(ds, attr)
        getattr(ds, attr)

    ds = DataSet(path_to_db, run_id=1)

    for attr in attrs:
        assert hasattr(ds, attr)
        getattr(ds, attr)
Esempio n. 9
0
def load_by_counter(counter: int,
                    exp_id: int,
                    conn: Optional[ConnectionPlus] = None) -> DataSet:
    """
    Load a dataset given its counter in a given experiment

    Lookup is performed in the database file that is specified in the config.

    Args:
        counter: counter of the dataset within the given experiment
        exp_id: id of the experiment where to look for the dataset
        conn: connection to the database to load from. If not provided, a
          connection to the DB file specified in the config is made

    Returns:
        dataset of the given counter in the given experiment
    """
    conn = conn or connect(get_DB_location())
    sql = """
    SELECT run_id
    FROM
      runs
    WHERE
      result_counter= ? AND
      exp_id = ?
    """
    c = transaction(conn, sql, counter, exp_id)
    run_id = one(c, 'run_id')

    d = DataSet(conn=conn, run_id=run_id)
    return d
Esempio n. 10
0
def load_by_id(run_id: int, conn: Optional[ConnectionPlus] = None) -> DataSet:
    """
    Load dataset by run id

    If no connection is provided, lookup is performed in the database file that
    is specified in the config.

    Note that the `run_id` used in this function in not preserved when copying
    data to another db file. We recommend using :func:`.load_by_run_spec` which
    does not have this issue and is significantly more flexible.

    Args:
        run_id: run id of the dataset
        conn: connection to the database to load from

    Returns:
        :class:`.DataSet` with the given run id
    """
    if run_id is None:
        raise ValueError('run_id has to be a positive integer, not None.')

    conn = conn or connect(get_DB_location())

    d = DataSet(conn=conn, run_id=run_id)
    return d
Esempio n. 11
0
def test_load_last_experiment(empty_temp_db):
    # test in case of no experiments
    with pytest.raises(ValueError, match='There are no experiments in the '
                                         'database file'):
        _ = load_last_experiment()

    # create 2 experiments
    exp1 = Experiment(exp_id=None)
    exp2 = Experiment(exp_id=None)
    assert get_DB_location() == exp1.path_to_db
    assert get_DB_location() == exp2.path_to_db

    # load last and assert that its the 2nd one that was created
    last_exp = load_last_experiment()
    assert last_exp.exp_id == exp2.exp_id
    assert last_exp.exp_id != exp1.exp_id
    assert last_exp.path_to_db == exp2.path_to_db
Esempio n. 12
0
    def __init__(self,
                 path_to_db: Optional[str] = None,
                 exp_id: Optional[int] = None,
                 name: Optional[str] = None,
                 sample_name: Optional[str] = None,
                 format_string: str = "{}-{}-{}",
                 conn: Optional[ConnectionPlus] = None) -> None:
        """
        Create or load an experiment. If exp_id is None, a new experiment is
        created. If exp_id is not None, an experiment is loaded.

        Args:
            path_to_db: The path of the database file to create in/load from.
              If a conn is passed together with path_to_db, an exception is
              raised
            exp_id: The id of the experiment to load
            name: The name of the experiment to create. Ignored if exp_id is
              not None
            sample_name: The sample name for this experiment. Ignored if exp_id
              is not None
            format_string: The format string used to name result-tables.
              Ignored if exp_id is not None.
            conn: connection to the database. If not supplied, the constructor
              first tries to use path_to_db to figure out where to connect to.
              If path_to_db is not supplied either, a new connection
              to the DB file specified in the config is made
        """

        if path_to_db is not None and conn is not None:
            raise ValueError('Received BOTH conn and path_to_db. Please '
                             'provide only one or the other.')

        self._path_to_db = path_to_db or get_DB_location()
        self.conn = conn or connect(self.path_to_db, get_DB_debug())

        max_id = len(get_experiments(self.conn))

        if exp_id is not None:
            if exp_id not in range(1, max_id + 1):
                raise ValueError('No such experiment in the database')
            self._exp_id = exp_id
        else:

            # it is better to catch an invalid format string earlier than later
            try:
                # the corresponding function from sqlite module will try to
                # format as `(name, exp_id, run_counter)`, hence we prepare
                # for that here
                format_string.format("name", 1, 1)
            except Exception as e:
                raise ValueError("Invalid format string. Can not format "
                                 "(name, exp_id, run_counter)") from e

            log.info("creating new experiment in {}".format(self.path_to_db))

            name = name or f"experiment_{max_id+1}"
            sample_name = sample_name or "some_sample"
            self._exp_id = ne(self.conn, name, sample_name, format_string)
Esempio n. 13
0
def load_experiment_by_name(
    name: str,
    sample: Optional[str] = None,
    conn: Optional[ConnectionPlus] = None,
    load_last_duplicate: bool = False,
) -> Experiment:
    """
    Try to load experiment with the specified name.

    Nothing stops you from having many experiments with the same name and
    sample name. In that case this won't work unless load_last_duplicate
    is set to True. Then, the last of duplicated experiments will be loaded.

    Args:
        name: the name of the experiment
        sample: the name of the sample
        load_last_duplicate: If True, prevent raising error for having
            multiple experiments with the same name and sample name, and
            load the last duplicated experiment, instead.
        conn: connection to the database. If not supplied, a new connection
            to the DB file specified in the config is made

    Returns:
        the requested experiment

    Raises:
        ValueError either if the name and sample name are not unique, unless
        load_last_duplicate is True, or if no experiment found for the
        supplied name and sample.
        .
    """
    conn = conn or connect(get_DB_location())
    if sample is not None:
        args_to_find = {"name": name, "sample_name": sample}
    else:
        args_to_find = {"name": name}
    exp_ids = get_matching_exp_ids(conn, **args_to_find)
    if len(exp_ids) == 0:
        raise ValueError("Experiment not found")
    elif len(exp_ids) > 1:
        _repr = []
        for exp_id in exp_ids:
            exp = load_experiment(exp_id, conn=conn)
            s = (f"exp_id:{exp.exp_id} ({exp.name}-{exp.sample_name})"
                 f" started at ({exp.started_at})")
            _repr.append(s)
        _repr_str = "\n".join(_repr)
        if load_last_duplicate:
            e = exp
        else:
            raise ValueError(f"Many experiments matching your request"
                             f" found:\n{_repr_str}")
    else:
        e = Experiment(exp_id=exp_ids[0], conn=conn)
    _set_default_experiment_id(path_to_dbfile(conn), e.exp_id)
    return e
Esempio n. 14
0
def load_by_run_spec(*,
                     captured_run_id: Optional[int] = None,
                     captured_counter: Optional[int] = None,
                     experiment_name: Optional[str] = None,
                     sample_name: Optional[str] = None,
                     # guid parts
                     sample_id: Optional[int] = None,
                     location: Optional[int] = None,
                     work_station: Optional[int] = None,
                     conn: Optional[ConnectionPlus] = None) -> DataSet:
    """
    Load a run from one or more pieces of runs specification. All
    fields are optional but the function will raise an error if more than one
    run matching the supplied specification is found. Along with the error
    specs of the runs found will be printed.

    Args:
        captured_run_id: The run_id that was originally assigned to this
          at the time of capture.
        captured_counter: The counter that was originally assigned to this
          at the time of capture.
        experiment_name: name of the experiment that the run was captured
        sample_name: The name of the sample given when creating the experiment.
        sample_id: The sample_id assigned as part of the GUID.
        location: The location code assigned as part of GUID.
        work_station: The workstation assigned as part of the GUID.
        conn: An optional connection to the database. If no connection is
          supplied a connection to the default database will be opened.

    Raises:
        NameError: if no run or more than one run with the given specification
         exists in the database

    Returns:
        :class:`.DataSet` matching the provided specification.
    """
    conn = conn or connect(get_DB_location())
    guids = get_guids_from_run_spec(conn,
                                    captured_run_id=captured_run_id,
                                    captured_counter=captured_counter,
                                    experiment_name=experiment_name,
                                    sample_name=sample_name)

    matched_guids = filter_guids_by_parts(guids, location, sample_id,
                                          work_station)

    if len(matched_guids) == 1:
        return load_by_guid(matched_guids[0], conn)
    elif len(matched_guids) > 1:
        print(generate_dataset_table(matched_guids, conn=conn))
        raise NameError("More than one matching dataset found. "
                        "Please supply more information to uniquely"
                        "identify a dataset")
    else:
        raise NameError(f'No run matching the supplied information '
                        f'found.')
Esempio n. 15
0
def load_last_experiment() -> Experiment:
    """
    Load last experiment (from database file from config)

    Returns:
        last experiment
    """
    last_exp_id = get_last_experiment(connect(get_DB_location()))
    if last_exp_id is None:
        raise ValueError('There are no experiments in the database file')
    return Experiment(exp_id=last_exp_id)
Esempio n. 16
0
def load_last_experiment(conn: Optional[ConnectionPlus] = None) -> Experiment:
    """
    Load last experiment (from database file from config)

    Returns:
        last experiment
    """
    conn = conn or connect(get_DB_location())
    last_exp_id = get_last_experiment(conn)
    if last_exp_id is None:
        raise ValueError('There are no experiments in the database file')
    return Experiment(exp_id=last_exp_id, conn=conn)
Esempio n. 17
0
def test_runs_table_columns(empty_temp_db):
    """
    Ensure that the column names of a pristine runs table are what we expect
    """
    colnames = mut_queries.RUNS_TABLE_COLUMNS.copy()
    conn = mut_db.connect(get_DB_location())
    query = "PRAGMA table_info(runs)"
    cursor = conn.cursor()
    for row in cursor.execute(query):
        colnames.remove(row['name'])

    assert colnames == []
Esempio n. 18
0
def test_dataset_length():

    path_to_db = get_DB_location()
    ds = DataSet(path_to_db, run_id=None)

    assert len(ds) == 0

    parameter = ParamSpecBase(name='single', paramtype='numeric',
                              label='', unit='N/A')
    idps = InterDependencies_(standalones=(parameter,))
    ds.set_interdependencies(idps)

    ds.mark_started()
    ds.add_results([{parameter.name: 1}])
    ds.mark_completed()

    assert len(ds) == 1
Esempio n. 19
0
def test_is_run_id_in_db(empty_temp_db):
    conn = mut_db.connect(get_DB_location())
    mut_queries.new_experiment(conn, 'test_exp', 'no_sample')

    for _ in range(5):
        ds = DataSet(conn=conn, run_id=None)

    # there should now be run_ids 1, 2, 3, 4, 5 in the database
    good_ids = [1, 2, 3, 4, 5]
    try_ids = [1, 3, 9999, 23, 0, 1, 1, 3, 34]

    sorted_try_ids = np.unique(try_ids)

    expected_dict = {tid: (tid in good_ids) for tid in sorted_try_ids}

    acquired_dict = mut_queries.is_run_id_in_database(conn, *try_ids)

    assert expected_dict == acquired_dict
Esempio n. 20
0
def load_or_create_experiment(
    experiment_name: str,
    sample_name: Optional[str] = None,
    conn: Optional[ConnectionPlus] = None,
    load_last_duplicate: bool = False,
) -> Experiment:
    """
    Find and return an experiment with the given name and sample name,
    or create one if not found.

    Args:
        experiment_name: Name of the experiment to find or create.
        sample_name: Name of the sample.
        load_last_duplicate: If True, prevent raising error for having
            multiple experiments with the same name and sample name, and
            load the last duplicated experiment, instead.
        conn: Connection to the database. If not supplied, a new connection
            to the DB file specified in the config is made.

    Returns:
        The found or created experiment
    Raises:
        ValueError: If the name and sample name are not unique, unless
            load_last_duplicate is True.
    """
    conn = conn or connect(get_DB_location())
    try:
        experiment = load_experiment_by_name(
            experiment_name,
            sample_name,
            load_last_duplicate=load_last_duplicate,
            conn=conn,
        )
    except ValueError as exception:
        if "Experiment not found" in str(exception):
            experiment = new_experiment(experiment_name,
                                        sample_name,
                                        conn=conn)
        else:
            raise exception
    return experiment
Esempio n. 21
0
def new_experiment(name: str,
                   sample_name: Optional[str],
                   format_string: str = "{}-{}-{}",
                   conn: Optional[ConnectionPlus] = None) -> Experiment:
    """
    Create a new experiment (in the database file from config)

    Args:
        name: the name of the experiment
        sample_name: the name of the current sample
        format_string: basic format string for table-name
            must contain 3 placeholders.
        conn: connection to the database. If not supplied, a new connection
          to the DB file specified in the config is made
    Returns:
        the new experiment
    """
    conn = conn or connect(get_DB_location())
    return Experiment(name=name, sample_name=sample_name,
                      format_string=format_string,
                      conn=conn)
Esempio n. 22
0
def load_by_id(run_id: int, conn: Optional[ConnectionPlus] = None) -> DataSet:
    """
    Load dataset by run id

    If no connection is provided, lookup is performed in the database file that
    is specified in the config.

    Args:
        run_id: run id of the dataset
        conn: connection to the database to load from

    Returns:
        dataset with the given run id
    """
    if run_id is None:
        raise ValueError('run_id has to be a positive integer, not None.')

    conn = conn or connect(get_DB_location())

    d = DataSet(conn=conn, run_id=run_id)
    return d
Esempio n. 23
0
def test_get_last_experiment_no_experiments(empty_temp_db):
    conn = mut_db.connect(get_DB_location())
    assert None is mut_queries.get_last_experiment(conn)
Esempio n. 24
0
    def __init__(self,
                 path_to_db: str = None,
                 run_id: Optional[int] = None,
                 conn: Optional[ConnectionPlus] = None,
                 exp_id=None,
                 name: str = None,
                 specs: Optional[SpecsOrInterDeps] = None,
                 values=None,
                 metadata=None) -> None:
        """
        Create a new DataSet object. The object can either hold a new run or
        an already existing run. If a run_id is provided, then an old run is
        looked up, else a new run is created.

        Args:
            path_to_db: path to the sqlite file on disk. If not provided, the
              path will be read from the config.
            run_id: provide this when loading an existing run, leave it
              as None when creating a new run
            conn: connection to the DB; if provided and `path_to_db` is
              provided as well, then a ValueError is raised (this is to
              prevent the possibility of providing a connection to a DB
              file that is different from `path_to_db`)
            exp_id: the id of the experiment in which to create a new run.
              Ignored if run_id is provided.
            name: the name of the dataset. Ignored if run_id is provided.
            specs: paramspecs belonging to the dataset. Ignored if run_id is
              provided.
            values: values to insert into the dataset. Ignored if run_id is
              provided.
            metadata: metadata to insert into the dataset. Ignored if run_id
              is provided.
        """
        if path_to_db is not None and conn is not None:
            raise ValueError("Both `path_to_db` and `conn` arguments have "
                             "been passed together with non-None values. "
                             "This is not allowed.")
        self._path_to_db = path_to_db or get_DB_location()

        self.conn = make_connection_plus_from(conn) if conn is not None else \
            connect(self.path_to_db)

        self._run_id = run_id
        self._debug = False
        self.subscribers: Dict[str, _Subscriber] = {}
        self._interdeps: InterDependencies_

        if run_id is not None:
            if not run_exists(self.conn, run_id):
                raise ValueError(f"Run with run_id {run_id} does not exist in "
                                 f"the database")
            self._completed = completed(self.conn, self.run_id)
            run_desc = self._get_run_description_from_db()
            if run_desc._old_style_deps:
                # TODO: what if the old run had invalid interdep.s?
                old_idps: InterDependencies = cast(InterDependencies,
                                                   run_desc.interdeps)
                self._interdeps = old_to_new(old_idps)
            else:
                new_idps: InterDependencies_ = cast(InterDependencies_,
                                                    run_desc.interdeps)
                self._interdeps = new_idps
            self._metadata = get_metadata_from_run_id(self.conn, run_id)
            self._started = self.run_timestamp_raw is not None

        else:
            # Actually perform all the side effects needed for the creation
            # of a new dataset. Note that a dataset is created (in the DB)
            # with no parameters; they are written to disk when the dataset
            # is marked as started
            if exp_id is None:
                if len(get_experiments(self.conn)) > 0:
                    exp_id = get_last_experiment(self.conn)
                else:
                    raise ValueError("No experiments found."
                                     "You can start a new one with:"
                                     " new_experiment(name, sample_name)")
            name = name or "dataset"
            _, run_id, __ = create_run(self.conn,
                                       exp_id,
                                       name,
                                       generate_guid(),
                                       parameters=None,
                                       values=values,
                                       metadata=metadata)
            # this is really the UUID (an ever increasing count in the db)
            self._run_id = run_id
            self._completed = False
            self._started = False
            if isinstance(specs, InterDependencies_):
                self._interdeps = specs
            elif specs is not None:
                self._interdeps = old_to_new(InterDependencies(*specs))
            else:
                self._interdeps = InterDependencies_()
            self._metadata = get_metadata_from_run_id(self.conn, self.run_id)