def load_by_guid(guid: str, conn: Optional[ConnectionPlus] = None) -> DataSet: """ Load a dataset by its GUID If no connection is provided, lookup is performed in the database file that is specified in the config. Args: guid: guid of the dataset conn: connection to the database to load from Returns: dataset with the given guid Raises: NameError: if no run with the given GUID exists in the database RuntimeError: if several runs with the given GUID are found """ conn = conn or connect(get_DB_location()) # this function raises a RuntimeError if more than one run matches the GUID run_id = get_runid_from_guid(conn, guid) if run_id == -1: raise NameError(f'No run with GUID: {guid} found in database.') return DataSet(run_id=run_id, conn=conn)
def _extract_single_dataset_into_db(dataset: DataSet, target_conn: ConnectionPlus, target_exp_id: int) -> None: """ NB: This function should only be called from within :meth:extract_runs_into_db Insert the given dataset into the specified database file as the latest run. Trying to insert a run already in the DB is a NOOP. Args: dataset: A dataset representing the run to be copied target_conn: connection to the DB. Must be atomically guarded target_exp_id: The exp_id of the (target DB) experiment in which to insert the run """ if not dataset.completed: raise ValueError('Dataset not completed. An incomplete dataset ' 'can not be copied. The incomplete dataset has ' f'GUID: {dataset.guid} and run_id: {dataset.run_id}') source_conn = dataset.conn run_id = get_runid_from_guid(target_conn, dataset.guid) if run_id != -1: return if dataset.parameters is not None: param_names = dataset.parameters.split(',') else: param_names = [] parspecs_dict = { p.name: p for p in new_to_old(dataset._interdeps).paramspecs } parspecs = [parspecs_dict[p] for p in param_names] metadata = dataset.metadata snapshot_raw = dataset.snapshot_raw _, target_run_id, target_table_name = create_run(target_conn, target_exp_id, name=dataset.name, guid=dataset.guid, parameters=parspecs, metadata=metadata) _populate_results_table(source_conn, target_conn, dataset.table_name, target_table_name) mark_run_complete(target_conn, target_run_id) _rewrite_timestamps(target_conn, target_run_id, dataset.run_timestamp_raw, dataset.completed_timestamp_raw) if snapshot_raw is not None: add_meta_data(target_conn, target_run_id, {'snapshot': snapshot_raw})