def test_link_construction_raises(not_guid): head_guid = generate_guid() tail_guid = generate_guid() edge_type = "fit" match = re.escape(f'The guid given for head is not a valid guid. Received ' f'{not_guid}.') with pytest.raises(ValueError, match=match): Link(not_guid, tail_guid, edge_type) match = re.escape(f'The guid given for tail is not a valid guid. Received ' f'{not_guid}') with pytest.raises(ValueError, match=match): Link(head_guid, not_guid, edge_type)
def prepare( self, *, snapshot: Mapping[Any, Any], interdeps: InterDependencies_, shapes: Shapes = None, parent_datasets: Sequence[Mapping[Any, Any]] = (), write_in_background: bool = False, ) -> None: if not self.pristine: raise RuntimeError( "Cannot prepare a dataset that is not pristine.") self.add_snapshot( json.dumps({"station": snapshot}, cls=NumpyJSONEncoder)) if interdeps == InterDependencies_(): raise RuntimeError("No parameters supplied") self._set_interdependencies(interdeps, shapes) links = [Link(head=self.guid, **pdict) for pdict in parent_datasets] self._set_parent_dataset_links(links) if self.pristine: self._perform_start_actions()
def test_link_construction_passes(): head_guid = generate_guid() tail_guid = generate_guid() edge_type = "fit" description = "We did a second order fit with math" link = Link(head_guid, tail_guid, edge_type) assert link.head == head_guid assert link.tail == tail_guid assert link.edge_type == edge_type assert link.description == "" link = Link(head_guid, tail_guid, edge_type, description) assert link.description == description
def generate_some_links(N: int) -> List[Link]: """ Generate N links with the same head """ def _timestamp() -> int: """ return a random timestamp that is approximately one day in the past. """ timestamp = datetime.now() - timedelta(days=1, seconds=random.randint(1, 1000)) return int(round(timestamp.timestamp() * 1000)) known_types = ("fit", "analysis", "step") known_descs = ("A second-order fit", "Manual analysis (see notebook)", "Step 3 in the characterisation") head_guid = generate_guid(_timestamp()) head_guids = [head_guid] * N tail_guids = [generate_guid(_timestamp()) for _ in range(N)] edge_types = [known_types[i % len(known_types)] for i in range(N)] descriptions = [known_descs[i % len(known_descs)] for i in range(N)] zipattrs = zip(head_guids, tail_guids, edge_types, descriptions) links = [Link(hg, tg, n, d) for hg, tg, n, d in zipattrs] return links
def __enter__(self) -> DataSaver: # TODO: should user actions really precede the dataset? # first do whatever bootstrapping the user specified for func, args in self.enteractions: func(*args) # next set up the "datasaver" if self.experiment is not None: self.ds = qc.new_data_set(self.name, self.experiment.exp_id, conn=self.experiment.conn) else: self.ds = qc.new_data_set(self.name) # .. and give the dataset a snapshot as metadata if self.station is None: station = qc.Station.default else: station = self.station if station: self.ds.add_snapshot( json.dumps({'station': station.snapshot()}, cls=NumpyJSONEncoder)) if self._interdependencies == InterDependencies_(): raise RuntimeError("No parameters supplied") else: self.ds.set_interdependencies(self._interdependencies) links = [ Link(head=self.ds.guid, **pdict) for pdict in self._parent_datasets ] self.ds.parent_dataset_links = links self.ds.mark_started(start_bg_writer=self._write_in_background) # register all subscribers for (callble, state) in self.subscribers: # We register with minimal waiting time. # That should make all subscribers be called when data is flushed # to the database log.debug(f'Subscribing callable {callble} with state {state}') self.ds.subscribe(callble, min_wait=0, min_count=1, state=state) print(f'Starting experimental run with id: {self.ds.run_id}.' f' {self._extra_log_info}') log.info(f'Starting measurement with guid: {self.ds.guid}.' f' {self._extra_log_info}') log.info(f'Using background writing: {self._write_in_background}') self.datasaver = DataSaver( dataset=self.ds, write_period=self.write_period, interdeps=self._interdependencies, write_in_background=self._write_in_background) return self.datasaver
def test_link_to_string_and_back(): head_guid = generate_guid() tail_guid = generate_guid() edge_type = "analysis" description = "hyper-spectral quantum blockchain ML" link = Link(head_guid, tail_guid, edge_type, description) lstr = link_to_str(link) newlink = str_to_link(lstr) assert newlink == link
def test_str_to_link(): head_guid = generate_guid() tail_guid = generate_guid() edge_type = "test" description = "used in test_str_to_link" lstr = json.dumps({ "head": head_guid, "tail": tail_guid, "edge_type": edge_type, "description": description }) expected_link = Link(head_guid, tail_guid, edge_type, description) assert str_to_link(lstr) == expected_link
def generate_some_links(N: int) -> List[Link]: """ Generate N links with the same head """ known_types = ("fit", "analysis", "step") known_descs = ("A second-order fit", "Manual analysis (see notebook)", "Step 3 in the characterisation") head_guid = generate_guid() head_guids = [head_guid] * N tail_guids = [generate_guid() for _ in range(N)] edge_types = [known_types[i % len(known_types)] for i in range(N)] descriptions = [known_descs[i % len(known_descs)] for i in range(N)] zipattrs = zip(head_guids, tail_guids, edge_types, descriptions) links = [Link(hg, tg, n, d) for hg, tg, n, d in zipattrs] return links
def test_basic_extraction(two_empty_temp_db_connections, some_interdeps): source_conn, target_conn = two_empty_temp_db_connections source_path = path_to_dbfile(source_conn) target_path = path_to_dbfile(target_conn) type_casters = { 'numeric': float, 'array': (lambda x: np.array(x) if hasattr(x, '__iter__') else np.array([x])), 'text': str } source_exp = Experiment(conn=source_conn) source_dataset = DataSet(conn=source_conn, name="basic_copy_paste_name") with pytest.raises(RuntimeError) as excinfo: extract_runs_into_db(source_path, target_path, source_dataset.run_id) assert error_caused_by(excinfo, ('Dataset not completed. An incomplete ' 'dataset can not be copied. The ' 'incomplete dataset has GUID: ' f'{source_dataset.guid} and run_id: ' f'{source_dataset.run_id}')) source_dataset.set_interdependencies(some_interdeps[0]) source_dataset.parent_dataset_links = [ Link(head=source_dataset.guid, tail=str(uuid.uuid4()), edge_type='test_link') ] source_dataset.mark_started() for value in range(10): result = { ps.name: type_casters[ps.type](value) for ps in some_interdeps[0].paramspecs } source_dataset.add_results([result]) source_dataset.add_metadata('goodness', 'fair') source_dataset.add_metadata('test', True) source_dataset.mark_completed() assert source_dataset.run_id == source_dataset.captured_run_id extract_runs_into_db(source_path, target_path, source_dataset.run_id) target_exp = Experiment(conn=target_conn, exp_id=1) length1 = len(target_exp) assert length1 == 1 # trying to insert the same run again should be a NOOP with raise_if_file_changed(target_path): extract_runs_into_db(source_path, target_path, source_dataset.run_id) assert len(target_exp) == length1 target_dataset = DataSet(conn=target_conn, run_id=1) # Now make the interesting comparisons: are the target objects the same as # the source objects? assert source_dataset.the_same_dataset_as(target_dataset) source_data = source_dataset.get_parameter_data( *source_dataset.parameters.split(',')) target_data = target_dataset.get_parameter_data( *target_dataset.parameters.split(',')) for outkey, outval in source_data.items(): for inkey, inval in outval.items(): np.testing.assert_array_equal(inval, target_data[outkey][inkey]) exp_attrs = [ 'name', 'sample_name', 'format_string', 'started_at', 'finished_at' ] for exp_attr in exp_attrs: assert getattr(source_exp, exp_attr) == getattr(target_exp, exp_attr) # trying to insert the same run again should be a NOOP with raise_if_file_changed(target_path): extract_runs_into_db(source_path, target_path, source_dataset.run_id)