def load_data_from_db(self) -> None: """ Loads data from the dataset into the cache. If new data has been added to the dataset since the last time this method was called, calling this method again would load that new portion of the data and append to the already loaded data. If the dataset is marked completed and data has already been loaded no load will be performed. """ if self.live: raise RuntimeError("Cannot load data into this cache from the " "database because this dataset is being built " "in-memory.") if self._loaded_from_completed_ds: return self._dataset._completed = completed( self._dataset.conn, self._dataset.run_id) if self._dataset.completed: self._loaded_from_completed_ds = True (self._write_status, self._read_status, self._data) = load_new_data_from_db_and_append( self._dataset.conn, self._dataset.table_name, self.rundescriber, self._write_status, self._read_status, self._data ) if not all(status is None for status in self._write_status.values()): self._live = False
def load_data_from_db(self) -> None: """ Loads data from the dataset into the cache. If new data has been added to the dataset since the last time this method was called, calling this method again would load that new portion of the data and append to the already loaded data. If the dataset is marked completed and data has already been loaded no load will be performed. """ if self._loaded_from_completed_ds: return self._dataset._completed = completed(self._dataset.conn, self._dataset.run_id) if self._dataset.completed: self._loaded_from_completed_ds = True interdeps = get_interdeps_from_result_table_name( self._dataset.conn, self._dataset.table_name) parameters = tuple(ps.name for ps in interdeps.non_dependencies) for parameter in parameters: start = self._read_status.get(parameter, 0) + 1 data, n_rows_read = get_parameter_data_for_one_paramtree( self._dataset.conn, self._dataset.table_name, interdeps=interdeps, output_param=parameter, start=start, end=None) self._data[parameter] = self._merge_data_dicts_inner( self._data.get(parameter, {}), data) self._read_status[parameter] = self._read_status.get( parameter, 0) + n_rows_read
def load_data_from_db(self) -> None: """ Loads data from the dataset into the cache. If new data has been added to the dataset since the last time this method was called, calling this method again would load that new portion of the data and append to the already loaded data. If the dataset is marked completed and data has already been loaded no load will be performed. """ if self._loaded_from_completed_ds: return self._dataset._completed = completed(self._dataset.conn, self._dataset.run_id) if self._dataset.completed: self._loaded_from_completed_ds = True (self._write_status, self._read_status, self._data) = append_shaped_parameter_data_to_existing_arrays( self._dataset.conn, self._dataset.table_name, self.rundescriber, self._write_status, self._read_status, self._data)
def __init__(self, path_to_db: str = None, run_id: Optional[int] = None, conn: Optional[ConnectionPlus] = None, exp_id=None, name: str = None, specs: Optional[SpecsOrInterDeps] = None, values=None, metadata=None) -> None: """ Create a new DataSet object. The object can either hold a new run or an already existing run. If a run_id is provided, then an old run is looked up, else a new run is created. Args: path_to_db: path to the sqlite file on disk. If not provided, the path will be read from the config. run_id: provide this when loading an existing run, leave it as None when creating a new run conn: connection to the DB; if provided and `path_to_db` is provided as well, then a ValueError is raised (this is to prevent the possibility of providing a connection to a DB file that is different from `path_to_db`) exp_id: the id of the experiment in which to create a new run. Ignored if run_id is provided. name: the name of the dataset. Ignored if run_id is provided. specs: paramspecs belonging to the dataset. Ignored if run_id is provided. values: values to insert into the dataset. Ignored if run_id is provided. metadata: metadata to insert into the dataset. Ignored if run_id is provided. """ if path_to_db is not None and conn is not None: raise ValueError("Both `path_to_db` and `conn` arguments have " "been passed together with non-None values. " "This is not allowed.") self._path_to_db = path_to_db or get_DB_location() self.conn = make_connection_plus_from(conn) if conn is not None else \ connect(self.path_to_db) self._run_id = run_id self._debug = False self.subscribers: Dict[str, _Subscriber] = {} self._interdeps: InterDependencies_ if run_id is not None: if not run_exists(self.conn, run_id): raise ValueError(f"Run with run_id {run_id} does not exist in " f"the database") self._completed = completed(self.conn, self.run_id) run_desc = self._get_run_description_from_db() if run_desc._old_style_deps: # TODO: what if the old run had invalid interdep.s? old_idps: InterDependencies = cast(InterDependencies, run_desc.interdeps) self._interdeps = old_to_new(old_idps) else: new_idps: InterDependencies_ = cast(InterDependencies_, run_desc.interdeps) self._interdeps = new_idps self._metadata = get_metadata_from_run_id(self.conn, run_id) self._started = self.run_timestamp_raw is not None else: # Actually perform all the side effects needed for the creation # of a new dataset. Note that a dataset is created (in the DB) # with no parameters; they are written to disk when the dataset # is marked as started if exp_id is None: if len(get_experiments(self.conn)) > 0: exp_id = get_last_experiment(self.conn) else: raise ValueError("No experiments found." "You can start a new one with:" " new_experiment(name, sample_name)") name = name or "dataset" _, run_id, __ = create_run(self.conn, exp_id, name, generate_guid(), parameters=None, values=values, metadata=metadata) # this is really the UUID (an ever increasing count in the db) self._run_id = run_id self._completed = False self._started = False if isinstance(specs, InterDependencies_): self._interdeps = specs elif specs is not None: self._interdeps = old_to_new(InterDependencies(*specs)) else: self._interdeps = InterDependencies_() self._metadata = get_metadata_from_run_id(self.conn, self.run_id)