def load_by_counter(counter, exp_id): """ Load a dataset given its counter in one experiment Args: counter: Counter of the dataset exp_id: Experiment the dataset belongs to Returns: the dataset """ conn = connect(get_DB_location()) sql = """ SELECT run_id FROM runs WHERE result_counter= ? AND exp_id = ? """ c = transaction(conn, sql, counter, exp_id) run_id = one(c, 'run_id') conn.close() d = DataSet(get_DB_location(), run_id=run_id) return d
def experiments() -> List[Experiment]: """ List all the experiments in the container (database file from config) Returns: All the experiments in the container """ log.info("loading experiments from {}".format(get_DB_location())) rows = get_experiments(connect(get_DB_location(), get_DB_debug())) experiments = [] for row in rows: experiments.append(load_experiment(row['exp_id'])) return experiments
def load_by_counter(counter: int, exp_id: int, conn: Optional[ConnectionPlus] = None) -> DataSet: """ Load a dataset given its counter in a given experiment Lookup is performed in the database file that is specified in the config. Args: counter: counter of the dataset within the given experiment exp_id: id of the experiment where to look for the dataset conn: connection to the database to load from. If not provided, a connection to the DB file specified in the config is made Returns: dataset of the given counter in the given experiment """ conn = conn or connect(get_DB_location()) sql = """ SELECT run_id FROM runs WHERE result_counter= ? AND exp_id = ? """ c = transaction(conn, sql, counter, exp_id) run_id = one(c, 'run_id') d = DataSet(conn=conn, run_id=run_id) return d
def load_by_guid(guid: str, conn: Optional[ConnectionPlus] = None) -> DataSet: """ Load a dataset by its GUID If no connection is provided, lookup is performed in the database file that is specified in the config. Args: guid: guid of the dataset conn: connection to the database to load from Returns: dataset with the given guid Raises: NameError: if no run with the given GUID exists in the database RuntimeError: if several runs with the given GUID are found """ conn = conn or connect(get_DB_location()) # this function raises a RuntimeError if more than one run matches the GUID run_id = get_runid_from_guid(conn, guid) if run_id == -1: raise NameError(f'No run with GUID: {guid} found in database.') return DataSet(run_id=run_id, conn=conn)
def load_or_create_experiment( experiment_name: str, sample_name: Optional[str] = None, conn: Optional[ConnectionPlus] = None) -> Experiment: """ Find and return an experiment with the given name and sample name, or create one if not found. Args: experiment_name: Name of the experiment to find or create sample_name: Name of the sample conn: Connection to the database. If not supplied, a new connection to the DB file specified in the config is made Returns: The found or created experiment """ conn = conn or connect(get_DB_location()) try: experiment = load_experiment_by_name(experiment_name, sample_name, conn=conn) except ValueError as exception: if "Experiment not found" in str(exception): experiment = new_experiment(experiment_name, sample_name, conn=conn) else: raise exception return experiment
def load_experiment_by_name( name: str, sample: Optional[str] = None, conn: Optional[ConnectionPlus] = None) -> Experiment: """ Try to load experiment with the specified name. Nothing stops you from having many experiments with the same name and sample_name. In that case this won't work. And warn you. Args: name: the name of the experiment sample: the name of the sample conn: connection to the database. If not supplied, a new connection to the DB file specified in the config is made Returns: the requested experiment Raises: ValueError if the name is not unique and sample name is None. """ conn = conn or connect(get_DB_location()) if sample: sql = """ SELECT * FROM experiments WHERE sample_name = ? AND name = ? """ c = transaction(conn, sql, sample, name) else: sql = """ SELECT * FROM experiments WHERE name = ? """ c = transaction(conn, sql, name) rows = c.fetchall() if len(rows) == 0: raise ValueError("Experiment not found") elif len(rows) > 1: _repr = [] for row in rows: s = (f"exp_id:{row['exp_id']} ({row['name']}-{row['sample_name']})" f" started at ({row['start_time']})") _repr.append(s) _repr_str = "\n".join(_repr) raise ValueError(f"Many experiments matching your request" f" found:\n{_repr_str}") else: e = Experiment(exp_id=rows[0]['exp_id'], conn=conn) return e
def new_data_set(name, exp_id: Optional[int] = None, specs: SPECS = None, values=None, metadata=None, conn=None) -> DataSet: """ Create a new dataset. If exp_id is not specified the last experiment will be loaded by default. Args: name: the name of the new dataset exp_id: the id of the experiments this dataset belongs to defaults to the last experiment specs: list of parameters to create this data_set with values: the values to associate with the parameters metadata: the values to associate with the dataset """ path_to_db = get_DB_location() d = DataSet(path_to_db, conn=conn) if exp_id is None: if len(get_experiments(d.conn)) > 0: exp_id = get_last_experiment(d.conn) else: raise ValueError("No experiments found." "You can start a new one with:" " new_experiment(name, sample_name)") d._new(name, exp_id, specs, values, metadata) return d
def test_has_attributes_after_init(): """ Ensure that all attributes are populated after __init__ in BOTH cases (run_id is None / run_id is not None) """ attrs = ['path_to_db', '_path_to_db', 'conn', '_run_id', 'run_id', '_debug', 'subscribers', '_completed', 'name', 'table_name', 'guid', 'number_of_results', 'counter', 'parameters', 'paramspecs', 'exp_id', 'exp_name', 'sample_name', 'run_timestamp_raw', 'completed_timestamp_raw', 'completed', 'snapshot', 'snapshot_raw'] path_to_db = get_DB_location() ds = DataSet(path_to_db, run_id=None) for attr in attrs: assert hasattr(ds, attr) getattr(ds, attr) ds = DataSet(path_to_db, run_id=1) for attr in attrs: assert hasattr(ds, attr) getattr(ds, attr)
def new_experiment(name: str, sample_name: str, format_string: Optional[str] = "{}-{}-{}") -> Experiment: """ Create a new experiment Args: name: the name of the experiment sample_name: the name of the current sample format_string: basic format string for table-name must contain 3 placeholders. Returns: the new experiment """ log.info("creating new experiment in {}".format(get_DB_location())) e = Experiment(get_DB_location()) e._new(name, sample_name, format_string) return e
def test_load_last_experiment(empty_temp_db): # test in case of no experiments with pytest.raises(ValueError, match='There are no experiments in the ' 'database file'): _ = load_last_experiment() # create 2 experiments exp1 = Experiment(exp_id=None) exp2 = Experiment(exp_id=None) assert get_DB_location() == exp1.path_to_db assert get_DB_location() == exp2.path_to_db # load last and assert that its the 2nd one that was created last_exp = load_last_experiment() assert last_exp.exp_id == exp2.exp_id assert last_exp.exp_id != exp1.exp_id assert last_exp.path_to_db == exp2.path_to_db
def load_last_experiment() -> Experiment: """ Load last experiment Returns: last experiment """ conn = connect(get_DB_location()) return Experiment(exp_id=get_last_experiment(conn))
def __init__(self, path_to_db: Optional[str] = None, exp_id: Optional[int] = None, name: Optional[str] = None, sample_name: Optional[str] = None, format_string: str = "{}-{}-{}", conn: Optional[ConnectionPlus] = None) -> None: """ Create or load an experiment. If exp_id is None, a new experiment is created. If exp_id is not None, an experiment is loaded. Args: path_to_db: The path of the database file to create in/load from. If a conn is passed together with path_to_db, an exception is raised exp_id: The id of the experiment to load name: The name of the experiment to create. Ignored if exp_id is not None sample_name: The sample name for this experiment. Ignored if exp_id is not None format_string: The format string used to name result-tables. Ignored if exp_id is not None. conn: connection to the database. If not supplied, the constructor first tries to use path_to_db to figure out where to connect to. If path_to_db is not supplied either, a new connection to the DB file specified in the config is made """ if path_to_db is not None and conn is not None: raise ValueError('Received BOTH conn and path_to_db. Please ' 'provide only one or the other.') self._path_to_db = path_to_db or get_DB_location() self.conn = conn or connect(self.path_to_db, get_DB_debug()) max_id = len(get_experiments(self.conn)) if exp_id is not None: if exp_id not in range(1, max_id + 1): raise ValueError('No such experiment in the database') self._exp_id = exp_id else: # it is better to catch an invalid format string earlier than later try: # the sqlite_base will try to format # (name, exp_id, run_counter) format_string.format("name", 1, 1) except Exception as e: raise ValueError("Invalid format string. Can not format " "(name, exp_id, run_counter)") from e log.info("creating new experiment in {}".format(self.path_to_db)) name = name or f"experiment_{max_id+1}" sample_name = sample_name or "some_sample" self._exp_id = ne(self.conn, name, sample_name, format_string)
def load_last_experiment() -> Experiment: """ Load last experiment Returns: last experiment """ e = Experiment(get_DB_location()) e.exp_id = get_last_experiment(e.conn) return e
def load_last_experiment() -> Experiment: """ Load last experiment (from database file from config) Returns: last experiment """ last_exp_id = get_last_experiment(connect(get_DB_location())) if last_exp_id is None: raise ValueError('There are no experiments in the database file') return Experiment(exp_id=last_exp_id)
def load_by_id(run_id)->DataSet: """ Load dataset by id Args: run_id: id of the dataset Returns: the datasets """ d = DataSet(get_DB_location(), run_id=run_id) return d
def test_runs_table_columns(empty_temp_db): """ Ensure that the column names of a pristine runs table are what we expect """ colnames = mut.RUNS_TABLE_COLUMNS.copy() conn = mut.connect(get_DB_location()) query = "PRAGMA table_info(runs)" cursor = conn.cursor() for row in cursor.execute(query): colnames.remove(row['name']) assert colnames == []
def load_experiment_by_name(name: str, sample: Optional[str] = None) -> Experiment: """ Try to load experiment with the specified name. Nothing stops you from having many experiments with the same name and sample_name. In that case this won't work. And warn you. Args: name: the name of the experiment sample: the name of the sample Returns: the requested experiment Raises: ValueError if the name is not unique and sample name is None. """ e = Experiment(get_DB_location()) if sample: sql = """ SELECT * FROM experiments WHERE sample_name = ? AND name = ? """ c = transaction(e.conn, sql, sample, name) else: sql = """ SELECT * FROM experiments WHERE name = ? """ c = transaction(e.conn, sql, name) rows = c.fetchall() if len(rows) == 0: raise ValueError("Experiment not found \n") elif len(rows) > 1: _repr = [] for row in rows: s = f"exp_id:{row['exp_id']} ({row['name']}-{row['sample_name']}) started at({row['start_time']})" _repr.append(s) _repr_str = "\n".join(_repr) raise ValueError( f"Many experiments matching your request found {_repr_str}") else: e.exp_id = rows[0]['exp_id'] return e
def load_experiment(exp_id: int) -> Experiment: """ Load experiment with the specified id Args: exp_id: experiment id Returns: experiment with the specified id """ e = Experiment(get_DB_location()) e.exp_id = exp_id return e
def new_data_set(name, exp_id: Optional[int] = None, specs: SPECS = None, values=None, metadata=None, conn=None) -> DataSet: """ Create a new dataset. If exp_id is not specified the last experiment will be loaded by default. Args: name: the name of the new dataset exp_id: the id of the experiments this dataset belongs to defaults to the last experiment specs: list of parameters to create this data_set with values: the values to associate with the parameters metadata: the values to associate with the dataset """ path_to_db = get_DB_location() if conn is None: tempcon = True conn = connect(get_DB_location()) else: tempcon = False if exp_id is None: if len(get_experiments(conn)) > 0: exp_id = get_last_experiment(conn) else: raise ValueError("No experiments found." "You can start a new one with:" " new_experiment(name, sample_name)") # This is admittedly a bit weird. We create a dataset, link it to some # run in the DB and then (using _new) change what it's linked to if tempcon: conn.close() conn = None d = DataSet(path_to_db, run_id=None, conn=conn) d._new(name, exp_id, specs, values, metadata) return d
def __init__(self, path_to_db: Optional[str]=None, exp_id: Optional[int]=None, name: Optional[str]=None, sample_name: Optional[str]=None, format_string: Optional[str]="{}-{}-{}") -> None: """ Create or load an experiment. If exp_id is None, a new experiment is created. If exp_id is not None, an experiment is loaded. Args: path_to_db: The path of the database file to create in/load from exp_id: The id of the experiment to load name: The name of the experiment to create. Ignored if exp_id is not None sample_name: The sample name for this experiment. Ignored if exp_id is not None format_string: The format string used to name result-tables. Ignored if exp_id is not None. """ self.path_to_db = path_to_db or get_DB_location() self.conn = connect(self.path_to_db, get_DB_debug()) max_id = len(get_experiments(self.conn)) if exp_id: if exp_id not in range(1, max_id+1): raise ValueError('No such experiment in the database') self._exp_id = exp_id else: # it is better to catch an invalid format string earlier than later try: # the sqlite_base will try to format # (name, exp_id, run_counter) format_string.format("name", 1, 1) except Exception as e: raise ValueError("Invalid format string. Can not format " "(name, exp_id, run_counter)") from e log.info("creating new experiment in {}".format(self.path_to_db)) name = name or f"experiment_{max_id+1}" sample_name = sample_name or "some_sample" self._exp_id = ne(self.conn, name, sample_name, format_string) self.format_string = format_string
def test_is_run_id_in_db(empty_temp_db): conn = mut.connect(get_DB_location()) mut.new_experiment(conn, 'test_exp', 'no_sample') for _ in range(5): ds = DataSet(conn=conn, run_id=None) # there should now be run_ids 1, 2, 3, 4, 5 in the database good_ids = [1, 2, 3, 4, 5] try_ids = [1, 3, 9999, 23, 0, 1, 1, 3, 34] sorted_try_ids = np.unique(try_ids) expected_dict = {tid: (tid in good_ids) for tid in sorted_try_ids} acquired_dict = mut.is_run_id_in_database(conn, *try_ids) assert expected_dict == acquired_dict
def load_by_id(run_id: int, conn: Optional[ConnectionPlus] = None) -> DataSet: """ Load dataset by run id If no connection is provided, lookup is performed in the database file that is specified in the config. Args: run_id: run id of the dataset conn: connection to the database to load from Returns: dataset with the given run id """ if run_id is None: raise ValueError('run_id has to be a positive integer, not None.') conn = conn or connect(get_DB_location()) d = DataSet(conn=conn, run_id=run_id) return d
def new_experiment(name: str, sample_name: Optional[str], format_string: str = "{}-{}-{}", conn: Optional[ConnectionPlus] = None) -> Experiment: """ Create a new experiment (in the database file from config) Args: name: the name of the experiment sample_name: the name of the current sample format_string: basic format string for table-name must contain 3 placeholders. conn: connection to the database. If not supplied, a new connection to the DB file specified in the config is made Returns: the new experiment """ conn = conn or connect(get_DB_location()) return Experiment(name=name, sample_name=sample_name, format_string=format_string, conn=conn)
def __init__(self, path_to_db: str=None, run_id: Optional[int]=None, conn=None, exp_id=None, name: str=None, specs: SPECS=None, values=None, metadata=None) -> None: """ Create a new DataSet object. The object can either hold a new run or an already existing run. If a run_id is provided, then an old run is looked up, else a new run is created. Args: path_to_db: path to the sqlite file on disk. If not provided, the path will be read from the config. run_id: provide this when loading an existing run, leave it as None when creating a new run conn: connection to the DB exp_id: the id of the experiment in which to create a new run. Ignored if run_id is provided. name: the name of the dataset. Ignored if run_id is provided. specs: paramspecs belonging to the dataset. Ignored if run_id is provided. values: values to insert into the dataset. Ignored if run_id is provided. metadata: metadata to insert into the dataset. Ignored if run_id is provided. """ # TODO: handle fail here by defaulting to # a standard db self.path_to_db = path_to_db or get_DB_location() if conn is None: self.conn = connect(self.path_to_db) else: self.conn = conn self.run_id = run_id self._debug = False self.subscribers: Dict[str, _Subscriber] = {} if run_id: if not run_exists(self.conn, run_id): raise ValueError(f"Run with run_id {run_id} does not exist in " f"the database") self._completed = completed(self.conn, self.run_id) else: if exp_id is None: if len(get_experiments(self.conn)) > 0: exp_id = get_last_experiment(self.conn) else: raise ValueError("No experiments found." "You can start a new one with:" " new_experiment(name, sample_name)") # Actually perform all the side effects needed for # the creation of a new dataset. name = name or "dataset" _, run_id, __ = create_run(self.conn, exp_id, name, generate_guid(), specs, values, metadata) # this is really the UUID (an ever increasing count in the db) self.run_id = run_id self._completed = False
def test_get_last_experiment_no_experiments(empty_temp_db): conn = mut.connect(get_DB_location()) assert None is mut.get_last_experiment(conn)
def __init__(self, path_to_db: str = None, run_id: Optional[int] = None, conn: Optional[ConnectionPlus] = None, exp_id=None, name: str = None, specs: Optional[SpecsOrInterDeps] = None, values=None, metadata=None) -> None: """ Create a new DataSet object. The object can either hold a new run or an already existing run. If a run_id is provided, then an old run is looked up, else a new run is created. Args: path_to_db: path to the sqlite file on disk. If not provided, the path will be read from the config. run_id: provide this when loading an existing run, leave it as None when creating a new run conn: connection to the DB; if provided and `path_to_db` is provided as well, then a ValueError is raised (this is to prevent the possibility of providing a connection to a DB file that is different from `path_to_db`) exp_id: the id of the experiment in which to create a new run. Ignored if run_id is provided. name: the name of the dataset. Ignored if run_id is provided. specs: paramspecs belonging to the dataset. Ignored if run_id is provided. values: values to insert into the dataset. Ignored if run_id is provided. metadata: metadata to insert into the dataset. Ignored if run_id is provided. """ if path_to_db is not None and conn is not None: raise ValueError("Both `path_to_db` and `conn` arguments have " "been passed together with non-None values. " "This is not allowed.") self._path_to_db = path_to_db or get_DB_location() self.conn = make_connection_plus_from(conn) if conn is not None else \ connect(self.path_to_db) self._run_id = run_id self._debug = False self.subscribers: Dict[str, _Subscriber] = {} self._interdeps: InterDependencies_ if run_id is not None: if not run_exists(self.conn, run_id): raise ValueError(f"Run with run_id {run_id} does not exist in " f"the database") self._completed = completed(self.conn, self.run_id) run_desc = self._get_run_description_from_db() if run_desc._old_style_deps: # TODO: what if the old run had invalid interdep.s? old_idps: InterDependencies = cast(InterDependencies, run_desc.interdeps) self._interdeps = old_to_new(old_idps) else: new_idps: InterDependencies_ = cast(InterDependencies_, run_desc.interdeps) self._interdeps = new_idps self._metadata = get_metadata_from_run_id(self.conn, run_id) self._started = self.run_timestamp_raw is not None else: # Actually perform all the side effects needed for the creation # of a new dataset. Note that a dataset is created (in the DB) # with no parameters; they are written to disk when the dataset # is marked as started if exp_id is None: if len(get_experiments(self.conn)) > 0: exp_id = get_last_experiment(self.conn) else: raise ValueError("No experiments found." "You can start a new one with:" " new_experiment(name, sample_name)") name = name or "dataset" _, run_id, __ = create_run(self.conn, exp_id, name, generate_guid(), parameters=None, values=values, metadata=metadata) # this is really the UUID (an ever increasing count in the db) self._run_id = run_id self._completed = False self._started = False if isinstance(specs, InterDependencies_): self._interdeps = specs elif specs is not None: self._interdeps = old_to_new(InterDependencies(*specs)) else: self._interdeps = InterDependencies_() self._metadata = get_metadata_from_run_id(self.conn, self.run_id)