Exemple #1
0
def test_link_construction_raises(not_guid):
    head_guid = generate_guid()
    tail_guid = generate_guid()
    edge_type = "fit"

    match = re.escape(f'The guid given for head is not a valid guid. Received '
                      f'{not_guid}.')
    with pytest.raises(ValueError, match=match):
        Link(not_guid, tail_guid, edge_type)

    match = re.escape(f'The guid given for tail is not a valid guid. Received '
                      f'{not_guid}')
    with pytest.raises(ValueError, match=match):
        Link(head_guid, not_guid, edge_type)
Exemple #2
0
    def prepare(
        self,
        *,
        snapshot: Mapping[Any, Any],
        interdeps: InterDependencies_,
        shapes: Shapes = None,
        parent_datasets: Sequence[Mapping[Any, Any]] = (),
        write_in_background: bool = False,
    ) -> None:
        if not self.pristine:
            raise RuntimeError(
                "Cannot prepare a dataset that is not pristine.")

        self.add_snapshot(
            json.dumps({"station": snapshot}, cls=NumpyJSONEncoder))

        if interdeps == InterDependencies_():
            raise RuntimeError("No parameters supplied")

        self._set_interdependencies(interdeps, shapes)
        links = [Link(head=self.guid, **pdict) for pdict in parent_datasets]
        self._set_parent_dataset_links(links)

        if self.pristine:
            self._perform_start_actions()
Exemple #3
0
def test_link_construction_passes():
    head_guid = generate_guid()
    tail_guid = generate_guid()
    edge_type = "fit"
    description = "We did a second order fit with math"

    link = Link(head_guid, tail_guid, edge_type)

    assert link.head == head_guid
    assert link.tail == tail_guid
    assert link.edge_type == edge_type
    assert link.description == ""

    link = Link(head_guid, tail_guid, edge_type, description)

    assert link.description == description
Exemple #4
0
def generate_some_links(N: int) -> List[Link]:
    """
    Generate N links with the same head
    """
    def _timestamp() -> int:
        """
        return a random timestamp that is approximately
        one day in the past.
        """
        timestamp = datetime.now() - timedelta(days=1,
                                               seconds=random.randint(1, 1000))
        return int(round(timestamp.timestamp() * 1000))

    known_types = ("fit", "analysis", "step")
    known_descs = ("A second-order fit", "Manual analysis (see notebook)",
                   "Step 3 in the characterisation")

    head_guid = generate_guid(_timestamp())
    head_guids = [head_guid] * N
    tail_guids = [generate_guid(_timestamp()) for _ in range(N)]
    edge_types = [known_types[i % len(known_types)] for i in range(N)]
    descriptions = [known_descs[i % len(known_descs)] for i in range(N)]

    zipattrs = zip(head_guids, tail_guids, edge_types, descriptions)

    links = [Link(hg, tg, n, d) for hg, tg, n, d in zipattrs]

    return links
Exemple #5
0
    def __enter__(self) -> DataSaver:
        # TODO: should user actions really precede the dataset?
        # first do whatever bootstrapping the user specified
        for func, args in self.enteractions:
            func(*args)

        # next set up the "datasaver"
        if self.experiment is not None:
            self.ds = qc.new_data_set(self.name,
                                      self.experiment.exp_id,
                                      conn=self.experiment.conn)
        else:
            self.ds = qc.new_data_set(self.name)

        # .. and give the dataset a snapshot as metadata
        if self.station is None:
            station = qc.Station.default
        else:
            station = self.station

        if station:
            self.ds.add_snapshot(
                json.dumps({'station': station.snapshot()},
                           cls=NumpyJSONEncoder))

        if self._interdependencies == InterDependencies_():
            raise RuntimeError("No parameters supplied")
        else:
            self.ds.set_interdependencies(self._interdependencies)

        links = [
            Link(head=self.ds.guid, **pdict) for pdict in self._parent_datasets
        ]
        self.ds.parent_dataset_links = links
        self.ds.mark_started(start_bg_writer=self._write_in_background)

        # register all subscribers
        for (callble, state) in self.subscribers:
            # We register with minimal waiting time.
            # That should make all subscribers be called when data is flushed
            # to the database
            log.debug(f'Subscribing callable {callble} with state {state}')
            self.ds.subscribe(callble, min_wait=0, min_count=1, state=state)

        print(f'Starting experimental run with id: {self.ds.run_id}.'
              f' {self._extra_log_info}')
        log.info(f'Starting measurement with guid: {self.ds.guid}.'
                 f' {self._extra_log_info}')
        log.info(f'Using background writing: {self._write_in_background}')

        self.datasaver = DataSaver(
            dataset=self.ds,
            write_period=self.write_period,
            interdeps=self._interdependencies,
            write_in_background=self._write_in_background)

        return self.datasaver
Exemple #6
0
def test_link_to_string_and_back():
    head_guid = generate_guid()
    tail_guid = generate_guid()
    edge_type = "analysis"
    description = "hyper-spectral quantum blockchain ML"

    link = Link(head_guid, tail_guid, edge_type, description)

    lstr = link_to_str(link)
    newlink = str_to_link(lstr)

    assert newlink == link
Exemple #7
0
def test_str_to_link():
    head_guid = generate_guid()
    tail_guid = generate_guid()
    edge_type = "test"
    description = "used in test_str_to_link"

    lstr = json.dumps({
        "head": head_guid,
        "tail": tail_guid,
        "edge_type": edge_type,
        "description": description
    })

    expected_link = Link(head_guid, tail_guid, edge_type, description)

    assert str_to_link(lstr) == expected_link
Exemple #8
0
def generate_some_links(N: int) -> List[Link]:
    """
    Generate N links with the same head
    """

    known_types = ("fit", "analysis", "step")
    known_descs = ("A second-order fit", "Manual analysis (see notebook)",
                   "Step 3 in the characterisation")

    head_guid = generate_guid()
    head_guids = [head_guid] * N
    tail_guids = [generate_guid() for _ in range(N)]
    edge_types = [known_types[i % len(known_types)] for i in range(N)]
    descriptions = [known_descs[i % len(known_descs)] for i in range(N)]

    zipattrs = zip(head_guids, tail_guids, edge_types, descriptions)

    links = [Link(hg, tg, n, d) for hg, tg, n, d in zipattrs]

    return links
Exemple #9
0
def test_basic_extraction(two_empty_temp_db_connections, some_interdeps):
    source_conn, target_conn = two_empty_temp_db_connections

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    type_casters = {
        'numeric':
        float,
        'array': (lambda x: np.array(x)
                  if hasattr(x, '__iter__') else np.array([x])),
        'text':
        str
    }

    source_exp = Experiment(conn=source_conn)
    source_dataset = DataSet(conn=source_conn, name="basic_copy_paste_name")

    with pytest.raises(RuntimeError) as excinfo:
        extract_runs_into_db(source_path, target_path, source_dataset.run_id)

    assert error_caused_by(excinfo, ('Dataset not completed. An incomplete '
                                     'dataset can not be copied. The '
                                     'incomplete dataset has GUID: '
                                     f'{source_dataset.guid} and run_id: '
                                     f'{source_dataset.run_id}'))

    source_dataset.set_interdependencies(some_interdeps[0])

    source_dataset.parent_dataset_links = [
        Link(head=source_dataset.guid,
             tail=str(uuid.uuid4()),
             edge_type='test_link')
    ]
    source_dataset.mark_started()

    for value in range(10):
        result = {
            ps.name: type_casters[ps.type](value)
            for ps in some_interdeps[0].paramspecs
        }
        source_dataset.add_results([result])

    source_dataset.add_metadata('goodness', 'fair')
    source_dataset.add_metadata('test', True)

    source_dataset.mark_completed()

    assert source_dataset.run_id == source_dataset.captured_run_id

    extract_runs_into_db(source_path, target_path, source_dataset.run_id)

    target_exp = Experiment(conn=target_conn, exp_id=1)

    length1 = len(target_exp)
    assert length1 == 1

    # trying to insert the same run again should be a NOOP
    with raise_if_file_changed(target_path):
        extract_runs_into_db(source_path, target_path, source_dataset.run_id)

    assert len(target_exp) == length1

    target_dataset = DataSet(conn=target_conn, run_id=1)

    # Now make the interesting comparisons: are the target objects the same as
    # the source objects?

    assert source_dataset.the_same_dataset_as(target_dataset)

    source_data = source_dataset.get_parameter_data(
        *source_dataset.parameters.split(','))
    target_data = target_dataset.get_parameter_data(
        *target_dataset.parameters.split(','))

    for outkey, outval in source_data.items():
        for inkey, inval in outval.items():
            np.testing.assert_array_equal(inval, target_data[outkey][inkey])

    exp_attrs = [
        'name', 'sample_name', 'format_string', 'started_at', 'finished_at'
    ]

    for exp_attr in exp_attrs:
        assert getattr(source_exp, exp_attr) == getattr(target_exp, exp_attr)

    # trying to insert the same run again should be a NOOP
    with raise_if_file_changed(target_path):
        extract_runs_into_db(source_path, target_path, source_dataset.run_id)