Beispiel #1
0
def test_experiments_with_NULL_sample_name(two_empty_temp_db_connections,
                                           some_paramspecs):
    """
    In older API versions (corresponding to DB version 3),
    users could get away with setting the sample name to None

    This test checks that such an experiment gets correctly recognised and
    is thus not ever re-inserted into the target DB
    """
    source_conn, target_conn = two_empty_temp_db_connections
    source_exp_1 = Experiment(conn=source_conn, name='null_sample_name')

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    # make 5 runs in experiment

    exp_1_run_ids = []
    for _ in range(5):

        source_dataset = DataSet(conn=source_conn, exp_id=source_exp_1.exp_id)
        exp_1_run_ids.append(source_dataset.run_id)

        for ps in some_paramspecs[2].values():
            source_dataset.add_parameter(ps)
        source_dataset.mark_started()

        for val in range(10):
            source_dataset.add_result(
                {ps.name: val
                 for ps in some_paramspecs[2].values()})
        source_dataset.mark_completed()

    sql = """
          UPDATE experiments
          SET sample_name = NULL
          WHERE exp_id = 1
          """
    source_conn.execute(sql)
    source_conn.commit()

    assert source_exp_1.sample_name is None

    extract_runs_into_db(source_path, target_path, 1, 2, 3, 4, 5)

    assert len(get_experiments(target_conn)) == 1

    extract_runs_into_db(source_path, target_path, 1, 2, 3, 4, 5)

    assert len(get_experiments(target_conn)) == 1

    assert len(Experiment(exp_id=1, conn=target_conn)) == 5
Beispiel #2
0
def new_data_set(name,
                 exp_id: Optional[int] = None,
                 specs: SPECS = None,
                 values=None,
                 metadata=None,
                 conn=None) -> DataSet:
    """ Create a new dataset.
    If exp_id is not specified the last experiment will be loaded by default.

    Args:
        name: the name of the new dataset
        exp_id:  the id of the experiments this dataset belongs to
            defaults to the last experiment
        specs: list of parameters to create this data_set with
        values: the values to associate with the parameters
        metadata:  the values to associate with the dataset
    """
    path_to_db = get_DB_location()
    d = DataSet(path_to_db, conn=conn)

    if exp_id is None:
        if len(get_experiments(d.conn)) > 0:
            exp_id = get_last_experiment(d.conn)
        else:
            raise ValueError("No experiments found."
                             "You can start a new one with:"
                             " new_experiment(name, sample_name)")
    d._new(name, exp_id, specs, values, metadata)
    return d
    def __init__(self,
                 path_to_db: Optional[str] = None,
                 exp_id: Optional[int] = None,
                 name: Optional[str] = None,
                 sample_name: Optional[str] = None,
                 format_string: str = "{}-{}-{}",
                 conn: Optional[ConnectionPlus] = None) -> None:
        """
        Create or load an experiment. If exp_id is None, a new experiment is
        created. If exp_id is not None, an experiment is loaded.

        Args:
            path_to_db: The path of the database file to create in/load from.
              If a conn is passed together with path_to_db, an exception is
              raised
            exp_id: The id of the experiment to load
            name: The name of the experiment to create. Ignored if exp_id is
              not None
            sample_name: The sample name for this experiment. Ignored if exp_id
              is not None
            format_string: The format string used to name result-tables.
              Ignored if exp_id is not None.
            conn: connection to the database. If not supplied, the constructor
              first tries to use path_to_db to figure out where to connect to.
              If path_to_db is not supplied either, a new connection
              to the DB file specified in the config is made
        """

        if path_to_db is not None and conn is not None:
            raise ValueError('Received BOTH conn and path_to_db. Please '
                             'provide only one or the other.')

        self._path_to_db = path_to_db or get_DB_location()
        self.conn = conn or connect(self.path_to_db, get_DB_debug())

        max_id = len(get_experiments(self.conn))

        if exp_id is not None:
            if exp_id not in range(1, max_id + 1):
                raise ValueError('No such experiment in the database')
            self._exp_id = exp_id
        else:

            # it is better to catch an invalid format string earlier than later
            try:
                # the sqlite_base will try to format
                # (name, exp_id, run_counter)
                format_string.format("name", 1, 1)
            except Exception as e:
                raise ValueError("Invalid format string. Can not format "
                                 "(name, exp_id, run_counter)") from e

            log.info("creating new experiment in {}".format(self.path_to_db))

            name = name or f"experiment_{max_id+1}"
            sample_name = sample_name or "some_sample"
            self._exp_id = ne(self.conn, name, sample_name, format_string)
Beispiel #4
0
def experiments() -> List[Experiment]:
    """
    List all the experiments in the container (database file from config)

    Returns:
        All the experiments in the container
    """
    log.info("loading experiments from {}".format(get_DB_location()))
    rows = get_experiments(connect(get_DB_location(), get_DB_debug()))
    experiments = []
    for row in rows:
        experiments.append(load_experiment(row['exp_id']))
    return experiments
Beispiel #5
0
def experiments() -> List[Experiment]:
    """
    List all the experiments in the container

    Returns:
        All the experiments in the container

    """
    log.info("loading experiments from {}".format(DB))
    rows = get_experiments(connect(DB, debug_db))
    experiments = []
    for row in rows:
        experiments.append(load_experiment(row['exp_id']))
    return experiments
    def __init__(self, path_to_db: Optional[str]=None,
                 exp_id: Optional[int]=None,
                 name: Optional[str]=None,
                 sample_name: Optional[str]=None,
                 format_string: Optional[str]="{}-{}-{}") -> None:
        """
        Create or load an experiment. If exp_id is None, a new experiment is
        created. If exp_id is not None, an experiment is loaded.

        Args:
            path_to_db: The path of the database file to create in/load from
            exp_id: The id of the experiment to load
            name: The name of the experiment to create. Ignored if exp_id is
              not None
            sample_name: The sample name for this experiment. Ignored if exp_id
              is not None
            format_string: The format string used to name result-tables.
              Ignored if exp_id is not None.
        """

        self.path_to_db = path_to_db or get_DB_location()
        self.conn = connect(self.path_to_db, get_DB_debug())

        max_id = len(get_experiments(self.conn))

        if exp_id:
            if exp_id not in range(1, max_id+1):
                raise ValueError('No such experiment in the database')
            self._exp_id = exp_id
        else:

            # it is better to catch an invalid format string earlier than later
            try:
                # the sqlite_base will try to format
                # (name, exp_id, run_counter)
                format_string.format("name", 1, 1)
            except Exception as e:
                raise ValueError("Invalid format string. Can not format "
                                "(name, exp_id, run_counter)") from e

            log.info("creating new experiment in {}".format(self.path_to_db))

            name = name or f"experiment_{max_id+1}"
            sample_name = sample_name or "some_sample"
            self._exp_id = ne(self.conn, name, sample_name, format_string)
            self.format_string = format_string
Beispiel #7
0
def new_data_set(name,
                 exp_id: Optional[int] = None,
                 specs: SPECS = None,
                 values=None,
                 metadata=None,
                 conn=None) -> DataSet:
    """ Create a new dataset.
    If exp_id is not specified the last experiment will be loaded by default.

    Args:
        name: the name of the new dataset
        exp_id:  the id of the experiments this dataset belongs to
            defaults to the last experiment
        specs: list of parameters to create this data_set with
        values: the values to associate with the parameters
        metadata:  the values to associate with the dataset
    """
    path_to_db = get_DB_location()
    if conn is None:
        tempcon = True
        conn = connect(get_DB_location())
    else:
        tempcon = False

    if exp_id is None:
        if len(get_experiments(conn)) > 0:
            exp_id = get_last_experiment(conn)
        else:
            raise ValueError("No experiments found."
                             "You can start a new one with:"
                             " new_experiment(name, sample_name)")
    # This is admittedly a bit weird. We create a dataset, link it to some
    # run in the DB and then (using _new) change what it's linked to
    if tempcon:
        conn.close()
        conn = None
    d = DataSet(path_to_db, run_id=None, conn=conn)
    d._new(name, exp_id, specs, values, metadata)

    return d
def test_correct_experiment_routing(two_empty_temp_db_connections,
                                    some_paramspecs):
    """
    Test that existing experiments are correctly identified AND that multiple
    insertions of the same runs don't matter (run insertion is idempotent)
    """
    source_conn, target_conn = two_empty_temp_db_connections

    source_exp_1 = Experiment(conn=source_conn)

    # make 5 runs in first experiment

    exp_1_run_ids = []
    for _ in range(5):

        source_dataset = DataSet(conn=source_conn, exp_id=source_exp_1.exp_id)
        exp_1_run_ids.append(source_dataset.run_id)

        for ps in some_paramspecs[2].values():
            source_dataset.add_parameter(ps)

        for val in range(10):
            source_dataset.add_result({ps.name: val
                                       for ps in some_paramspecs[2].values()})
        source_dataset.mark_complete()

    # make a new experiment with 1 run

    source_exp_2 = Experiment(conn=source_conn)
    ds = DataSet(conn=source_conn, exp_id=source_exp_2.exp_id, name="lala")
    exp_2_run_ids = [ds.run_id]

    for ps in some_paramspecs[2].values():
        ds.add_parameter(ps)

    for val in range(10):
        ds.add_result({ps.name: val for ps in some_paramspecs[2].values()})

    ds.mark_complete()

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    # now copy 2 runs
    extract_runs_into_db(source_path, target_path, *exp_1_run_ids[:2])

    target_exp1 = Experiment(conn=target_conn, exp_id=1)

    assert len(target_exp1) == 2

    # copy two other runs, one of them already in
    extract_runs_into_db(source_path, target_path, *exp_1_run_ids[1:3])

    assert len(target_exp1) == 3

    # insert run from different experiment
    extract_runs_into_db(source_path, target_path, ds.run_id)

    assert len(target_exp1) == 3

    target_exp2 = Experiment(conn=target_conn, exp_id=2)

    assert len(target_exp2) == 1

    # finally insert every single run from experiment 1

    extract_runs_into_db(source_path, target_path, *exp_1_run_ids)

    # check for idempotency once more by inserting all the runs but in another
    # order
    with raise_if_file_changed(target_path):
        extract_runs_into_db(source_path, target_path, *exp_1_run_ids[::-1])

    target_exps = get_experiments(target_conn)

    assert len(target_exps) == 2
    assert len(target_exp1) == 5
    assert len(target_exp2) == 1

    # check that all the datasets match up
    for run_id in exp_1_run_ids + exp_2_run_ids:
        source_ds = DataSet(conn=source_conn, run_id=run_id)
        target_ds = load_by_guid(guid=source_ds.guid, conn=target_conn)

        assert source_ds.the_same_dataset_as(target_ds)

        source_data = source_ds.get_data(*source_ds.parameters.split(','))
        target_data = target_ds.get_data(*target_ds.parameters.split(','))

        assert source_data == target_data
Beispiel #9
0
    def __init__(self,
                 path_to_db: str = None,
                 run_id: Optional[int] = None,
                 conn: Optional[ConnectionPlus] = None,
                 exp_id=None,
                 name: str = None,
                 specs: Optional[SpecsOrInterDeps] = None,
                 values=None,
                 metadata=None) -> None:
        """
        Create a new DataSet object. The object can either hold a new run or
        an already existing run. If a run_id is provided, then an old run is
        looked up, else a new run is created.

        Args:
            path_to_db: path to the sqlite file on disk. If not provided, the
              path will be read from the config.
            run_id: provide this when loading an existing run, leave it
              as None when creating a new run
            conn: connection to the DB; if provided and `path_to_db` is
              provided as well, then a ValueError is raised (this is to
              prevent the possibility of providing a connection to a DB
              file that is different from `path_to_db`)
            exp_id: the id of the experiment in which to create a new run.
              Ignored if run_id is provided.
            name: the name of the dataset. Ignored if run_id is provided.
            specs: paramspecs belonging to the dataset. Ignored if run_id is
              provided.
            values: values to insert into the dataset. Ignored if run_id is
              provided.
            metadata: metadata to insert into the dataset. Ignored if run_id
              is provided.
        """
        if path_to_db is not None and conn is not None:
            raise ValueError("Both `path_to_db` and `conn` arguments have "
                             "been passed together with non-None values. "
                             "This is not allowed.")
        self._path_to_db = path_to_db or get_DB_location()

        self.conn = make_connection_plus_from(conn) if conn is not None else \
            connect(self.path_to_db)

        self._run_id = run_id
        self._debug = False
        self.subscribers: Dict[str, _Subscriber] = {}
        self._interdeps: InterDependencies_

        if run_id is not None:
            if not run_exists(self.conn, run_id):
                raise ValueError(f"Run with run_id {run_id} does not exist in "
                                 f"the database")
            self._completed = completed(self.conn, self.run_id)
            run_desc = self._get_run_description_from_db()
            if run_desc._old_style_deps:
                # TODO: what if the old run had invalid interdep.s?
                old_idps: InterDependencies = cast(InterDependencies,
                                                   run_desc.interdeps)
                self._interdeps = old_to_new(old_idps)
            else:
                new_idps: InterDependencies_ = cast(InterDependencies_,
                                                    run_desc.interdeps)
                self._interdeps = new_idps
            self._metadata = get_metadata_from_run_id(self.conn, run_id)
            self._started = self.run_timestamp_raw is not None

        else:
            # Actually perform all the side effects needed for the creation
            # of a new dataset. Note that a dataset is created (in the DB)
            # with no parameters; they are written to disk when the dataset
            # is marked as started
            if exp_id is None:
                if len(get_experiments(self.conn)) > 0:
                    exp_id = get_last_experiment(self.conn)
                else:
                    raise ValueError("No experiments found."
                                     "You can start a new one with:"
                                     " new_experiment(name, sample_name)")
            name = name or "dataset"
            _, run_id, __ = create_run(self.conn,
                                       exp_id,
                                       name,
                                       generate_guid(),
                                       parameters=None,
                                       values=values,
                                       metadata=metadata)
            # this is really the UUID (an ever increasing count in the db)
            self._run_id = run_id
            self._completed = False
            self._started = False
            if isinstance(specs, InterDependencies_):
                self._interdeps = specs
            elif specs is not None:
                self._interdeps = old_to_new(InterDependencies(*specs))
            else:
                self._interdeps = InterDependencies_()
            self._metadata = get_metadata_from_run_id(self.conn, self.run_id)
Beispiel #10
0
    def __init__(self, path_to_db: str=None,
                 run_id: Optional[int]=None,
                 conn=None,
                 exp_id=None,
                 name: str=None,
                 specs: SPECS=None,
                 values=None,
                 metadata=None) -> None:
        """
        Create a new DataSet object. The object can either hold a new run or
        an already existing run. If a run_id is provided, then an old run is
        looked up, else a new run is created.

        Args:
            path_to_db: path to the sqlite file on disk. If not provided, the
              path will be read from the config.
            run_id: provide this when loading an existing run, leave it
              as None when creating a new run
            conn: connection to the DB
            exp_id: the id of the experiment in which to create a new run.
              Ignored if run_id is provided.
            name: the name of the dataset. Ignored if run_id is provided.
            specs: paramspecs belonging to the dataset. Ignored if run_id is
              provided.
            values: values to insert into the dataset. Ignored if run_id is
              provided.
            metadata: metadata to insert into the dataset. Ignored if run_id
              is provided.
        """
        # TODO: handle fail here by defaulting to
        # a standard db
        self.path_to_db = path_to_db or get_DB_location()
        if conn is None:
            self.conn = connect(self.path_to_db)
        else:
            self.conn = conn

        self.run_id = run_id
        self._debug = False
        self.subscribers: Dict[str, _Subscriber] = {}
        if run_id:
            if not run_exists(self.conn, run_id):
                raise ValueError(f"Run with run_id {run_id} does not exist in "
                                 f"the database")
            self._completed = completed(self.conn, self.run_id)
        else:

            if exp_id is None:
                if len(get_experiments(self.conn)) > 0:
                    exp_id = get_last_experiment(self.conn)
                else:
                    raise ValueError("No experiments found."
                                     "You can start a new one with:"
                                     " new_experiment(name, sample_name)")

            # Actually perform all the side effects needed for
            # the creation of a new dataset.

            name = name or "dataset"

            _, run_id, __ = create_run(self.conn, exp_id, name,
                                       generate_guid(),
                                       specs, values, metadata)

            # this is really the UUID (an ever increasing count in the db)
            self.run_id = run_id
            self._completed = False