Beispiel #1
0
    def __init__(self,
                 path_to_db: str = None,
                 run_id: Optional[int] = None,
                 conn: Optional[ConnectionPlus] = None,
                 exp_id=None,
                 name: str = None,
                 specs: Optional[SpecsOrInterDeps] = None,
                 values=None,
                 metadata=None) -> None:
        """
        Create a new DataSet object. The object can either hold a new run or
        an already existing run. If a run_id is provided, then an old run is
        looked up, else a new run is created.

        Args:
            path_to_db: path to the sqlite file on disk. If not provided, the
              path will be read from the config.
            run_id: provide this when loading an existing run, leave it
              as None when creating a new run
            conn: connection to the DB; if provided and `path_to_db` is
              provided as well, then a ValueError is raised (this is to
              prevent the possibility of providing a connection to a DB
              file that is different from `path_to_db`)
            exp_id: the id of the experiment in which to create a new run.
              Ignored if run_id is provided.
            name: the name of the dataset. Ignored if run_id is provided.
            specs: paramspecs belonging to the dataset. Ignored if run_id is
              provided.
            values: values to insert into the dataset. Ignored if run_id is
              provided.
            metadata: metadata to insert into the dataset. Ignored if run_id
              is provided.
        """
        if path_to_db is not None and conn is not None:
            raise ValueError("Both `path_to_db` and `conn` arguments have "
                             "been passed together with non-None values. "
                             "This is not allowed.")
        self._path_to_db = path_to_db or get_DB_location()

        self.conn = make_connection_plus_from(conn) if conn is not None else \
            connect(self.path_to_db)

        self._run_id = run_id
        self._debug = False
        self.subscribers: Dict[str, _Subscriber] = {}
        self._interdeps: InterDependencies_

        if run_id is not None:
            if not run_exists(self.conn, run_id):
                raise ValueError(f"Run with run_id {run_id} does not exist in "
                                 f"the database")
            self._completed = completed(self.conn, self.run_id)
            run_desc = self._get_run_description_from_db()
            if run_desc._old_style_deps:
                # TODO: what if the old run had invalid interdep.s?
                old_idps: InterDependencies = cast(InterDependencies,
                                                   run_desc.interdeps)
                self._interdeps = old_to_new(old_idps)
            else:
                new_idps: InterDependencies_ = cast(InterDependencies_,
                                                    run_desc.interdeps)
                self._interdeps = new_idps
            self._metadata = get_metadata_from_run_id(self.conn, run_id)
            self._started = self.run_timestamp_raw is not None

        else:
            # Actually perform all the side effects needed for the creation
            # of a new dataset. Note that a dataset is created (in the DB)
            # with no parameters; they are written to disk when the dataset
            # is marked as started
            if exp_id is None:
                if len(get_experiments(self.conn)) > 0:
                    exp_id = get_last_experiment(self.conn)
                else:
                    raise ValueError("No experiments found."
                                     "You can start a new one with:"
                                     " new_experiment(name, sample_name)")
            name = name or "dataset"
            _, run_id, __ = create_run(self.conn,
                                       exp_id,
                                       name,
                                       generate_guid(),
                                       parameters=None,
                                       values=values,
                                       metadata=metadata)
            # this is really the UUID (an ever increasing count in the db)
            self._run_id = run_id
            self._completed = False
            self._started = False
            if isinstance(specs, InterDependencies_):
                self._interdeps = specs
            elif specs is not None:
                self._interdeps = old_to_new(InterDependencies(*specs))
            else:
                self._interdeps = InterDependencies_()
            self._metadata = get_metadata_from_run_id(self.conn, self.run_id)
Beispiel #2
0
def test_run_exist(dataset):
    assert mut.run_exists(dataset.conn, dataset.run_id)
    assert not mut.run_exists(dataset.conn, dataset.run_id + 1)
Beispiel #3
0
    def __init__(self, path_to_db: str=None,
                 run_id: Optional[int]=None,
                 conn=None,
                 exp_id=None,
                 name: str=None,
                 specs: SPECS=None,
                 values=None,
                 metadata=None) -> None:
        """
        Create a new DataSet object. The object can either hold a new run or
        an already existing run. If a run_id is provided, then an old run is
        looked up, else a new run is created.

        Args:
            path_to_db: path to the sqlite file on disk. If not provided, the
              path will be read from the config.
            run_id: provide this when loading an existing run, leave it
              as None when creating a new run
            conn: connection to the DB
            exp_id: the id of the experiment in which to create a new run.
              Ignored if run_id is provided.
            name: the name of the dataset. Ignored if run_id is provided.
            specs: paramspecs belonging to the dataset. Ignored if run_id is
              provided.
            values: values to insert into the dataset. Ignored if run_id is
              provided.
            metadata: metadata to insert into the dataset. Ignored if run_id
              is provided.
        """
        # TODO: handle fail here by defaulting to
        # a standard db
        self.path_to_db = path_to_db or get_DB_location()
        if conn is None:
            self.conn = connect(self.path_to_db)
        else:
            self.conn = conn

        self.run_id = run_id
        self._debug = False
        self.subscribers: Dict[str, _Subscriber] = {}
        if run_id:
            if not run_exists(self.conn, run_id):
                raise ValueError(f"Run with run_id {run_id} does not exist in "
                                 f"the database")
            self._completed = completed(self.conn, self.run_id)
        else:

            if exp_id is None:
                if len(get_experiments(self.conn)) > 0:
                    exp_id = get_last_experiment(self.conn)
                else:
                    raise ValueError("No experiments found."
                                     "You can start a new one with:"
                                     " new_experiment(name, sample_name)")

            # Actually perform all the side effects needed for
            # the creation of a new dataset.

            name = name or "dataset"

            _, run_id, __ = create_run(self.conn, exp_id, name,
                                       generate_guid(),
                                       specs, values, metadata)

            # this is really the UUID (an ever increasing count in the db)
            self.run_id = run_id
            self._completed = False