Ejemplo n.º 1
0
def _add_metadata_to_xarray(
    dataset: DataSetProtocol, xrdataset: Union[xr.Dataset, xr.DataArray]
) -> None:
    xrdataset.attrs.update(
        {
            "ds_name": dataset.name,
            "sample_name": dataset.sample_name,
            "exp_name": dataset.exp_name,
            "snapshot": dataset._snapshot_raw or "null",
            "guid": dataset.guid,
            "run_timestamp": dataset.run_timestamp() or "",
            "completed_timestamp": dataset.completed_timestamp() or "",
            "captured_run_id": dataset.captured_run_id,
            "captured_counter": dataset.captured_counter,
            "run_id": dataset.run_id,
            "run_description": serial.to_json_for_storage(dataset.description),
            "parent_dataset_links": links_to_str(dataset.parent_dataset_links),
        }
    )
    if dataset.run_timestamp_raw is not None:
        xrdataset.attrs["run_timestamp_raw"] = dataset.run_timestamp_raw
    if dataset.completed_timestamp_raw is not None:
        xrdataset.attrs[
            "completed_timestamp_raw"] = dataset.completed_timestamp_raw
    if len(dataset.metadata) > 0:
        for metadata_tag, metadata in dataset.metadata.items():
            xrdataset.attrs[metadata_tag] = metadata
Ejemplo n.º 2
0
def _add_run_to_runs_table(
    dataset: DataSetProtocol,
    target_conn: ConnectionPlus,
    target_exp_id: int,
    create_run_table: bool = True,
) -> Optional[str]:
    metadata = dataset.metadata
    snapshot_raw = dataset._snapshot_raw
    captured_run_id = dataset.captured_run_id
    captured_counter = dataset.captured_counter
    parent_dataset_links = links_to_str(dataset.parent_dataset_links)
    _, target_run_id, target_table_name = create_run(
        target_conn,
        target_exp_id,
        name=dataset.name,
        guid=dataset.guid,
        metadata=metadata,
        captured_run_id=captured_run_id,
        captured_counter=captured_counter,
        parent_dataset_links=parent_dataset_links,
        create_run_table=create_run_table,
        snapshot_raw=snapshot_raw,
        description=dataset.description,
    )
    mark_run_complete(target_conn, target_run_id)
    _rewrite_timestamps(
        target_conn,
        target_run_id,
        dataset.run_timestamp_raw,
        dataset.completed_timestamp_raw,
    )
    return target_table_name
Ejemplo n.º 3
0
def _assert_xarray_metadata_is_as_expected(xarray_ds, qc_dataset):

    assert xarray_ds.ds_name == qc_dataset.name
    assert xarray_ds.sample_name == qc_dataset.sample_name
    assert xarray_ds.exp_name == qc_dataset.exp_name
    assert xarray_ds.snapshot == qc_dataset.snapshot_raw if qc_dataset.snapshot_raw is not None else "null"
    assert xarray_ds.guid == qc_dataset.guid
    assert xarray_ds.run_timestamp == qc_dataset.run_timestamp()
    assert xarray_ds.completed_timestamp == qc_dataset.completed_timestamp()
    assert xarray_ds.captured_run_id == qc_dataset.captured_run_id
    assert xarray_ds.captured_counter == qc_dataset.captured_counter
    assert xarray_ds.run_id == qc_dataset.run_id
    assert xarray_ds.run_description == serial.to_json_for_storage(
        qc_dataset.description)
    assert xarray_ds.parent_dataset_links == links_to_str(
        qc_dataset.parent_dataset_links)
Ejemplo n.º 4
0
    def _perform_start_actions(self) -> None:
        """
        Perform the actions that must take place once the run has been started
        """
        paramspecs = new_to_old(self._interdeps).paramspecs

        for spec in paramspecs:
            add_parameter(self.conn, self.table_name, spec)

        desc_str = serial.to_json_for_storage(self.description)

        update_run_description(self.conn, self.run_id, desc_str)

        set_run_timestamp(self.conn, self.run_id)

        pdl_str = links_to_str(self._parent_dataset_links)
        update_parent_datasets(self.conn, self.run_id, pdl_str)
Ejemplo n.º 5
0
    def _perform_start_actions(self) -> None:
        """
        Perform the actions that must take place once the run has been started
        """

        with contextlib.closing(
                conn_from_dbpath_or_conn(conn=None,
                                         path_to_db=self._path_to_db)) as conn:
            paramspecs = new_to_old(self.description.interdeps).paramspecs

            for spec in paramspecs:
                add_parameter(spec,
                              conn=conn,
                              run_id=self.run_id,
                              insert_into_results_table=False)

            desc_str = serial.to_json_for_storage(self.description)

            update_run_description(conn, self.run_id, desc_str)
            self._run_timestamp_raw = time.time()
            set_run_timestamp(conn, self.run_id, self._run_timestamp_raw)

            pdl_str = links_to_str(self._parent_dataset_links)
            update_parent_datasets(conn, self.run_id, pdl_str)
Ejemplo n.º 6
0
def _extract_single_dataset_into_db(dataset: DataSet,
                                    target_conn: ConnectionPlus,
                                    target_exp_id: int) -> None:
    """
    NB: This function should only be called from within
    meth:`extract_runs_into_db`

    Insert the given dataset into the specified database file as the latest
    run.

    Trying to insert a run already in the DB is a NOOP.

    Args:
        dataset: A dataset representing the run to be copied
        target_conn: connection to the DB. Must be atomically guarded
        target_exp_id: The ``exp_id`` of the (target DB) experiment in which to
          insert the run
    """

    if not dataset.completed:
        raise ValueError('Dataset not completed. An incomplete dataset '
                         'can not be copied. The incomplete dataset has '
                         f'GUID: {dataset.guid} and run_id: {dataset.run_id}')

    source_conn = dataset.conn

    run_id = get_runid_from_guid(target_conn, dataset.guid)

    if run_id != -1:
        return

    if dataset.parameters is not None:
        param_names = dataset.parameters.split(',')
    else:
        param_names = []
    parspecs_dict = {
        p.name: p
        for p in new_to_old(dataset._interdeps).paramspecs
    }
    parspecs = [parspecs_dict[p] for p in param_names]

    metadata = dataset.metadata
    snapshot_raw = dataset.snapshot_raw
    captured_run_id = dataset.captured_run_id
    captured_counter = dataset.captured_counter
    parent_dataset_links = links_to_str(dataset.parent_dataset_links)

    _, target_run_id, target_table_name = create_run(
        target_conn,
        target_exp_id,
        name=dataset.name,
        guid=dataset.guid,
        parameters=parspecs,
        metadata=metadata,
        captured_run_id=captured_run_id,
        captured_counter=captured_counter,
        parent_dataset_links=parent_dataset_links)

    _populate_results_table(source_conn, target_conn, dataset.table_name,
                            target_table_name)
    mark_run_complete(target_conn, target_run_id)
    _rewrite_timestamps(target_conn, target_run_id, dataset.run_timestamp_raw,
                        dataset.completed_timestamp_raw)

    if snapshot_raw is not None:
        add_meta_data(target_conn, target_run_id, {'snapshot': snapshot_raw})
Ejemplo n.º 7
0
def test_links_to_str_and_back(N):
    links = generate_some_links(N)

    new_links = str_to_links(links_to_str(links))

    assert new_links == links