def parameter_test_helper(ds: DataSet, toplevel_names: Sequence[str], expected_names: Dict[str, Sequence[str]], expected_shapes: Dict[str, Sequence[Tuple[int, ...]]], expected_values: Dict[str, Sequence[np.ndarray]], start: Optional[int] = None, end: Optional[int] = None): """ A helper function to compare the data we actually read out of a given dataset with the expected data. Args: ds: the dataset in question toplevel_names: names of the toplevel parameters of the dataset expected_names: names of the parameters expected to be loaded for a given parameter as a sequence indexed by the parameter. expected_shapes: expected shapes of the parameters loaded. The shapes should be stored as a tuple per parameter in a sequence containing all the loaded parameters for a given requested parameter. expected_values: expected content of the data arrays stored in a sequenceexpected_names: """ data = ds.get_parameter_data(*toplevel_names, start=start, end=end) dataframe = ds.get_data_as_pandas_dataframe(*toplevel_names, start=start, end=end) all_data = ds.get_parameter_data(start=start, end=end) all_dataframe = ds.get_data_as_pandas_dataframe(start=start, end=end) all_parameters = list(all_data.keys()) assert set(data.keys()).issubset(set(all_parameters)) assert list(data.keys()) == list(dataframe.keys()) assert len(data.keys()) == len(toplevel_names) assert len(dataframe.keys()) == len(toplevel_names) verify_data_dict(data, dataframe, toplevel_names, expected_names, expected_shapes, expected_values) verify_data_dict(all_data, all_dataframe, toplevel_names, expected_names, expected_shapes, expected_values) # Now lets remove a random element from the list # We do this one by one until there is only one element in the list subset_names = copy(all_parameters) while len(subset_names) > 1: elem_to_remove = random.randint(0, len(subset_names) - 1) name_removed = subset_names.pop(elem_to_remove) expected_names.pop(name_removed) expected_shapes.pop(name_removed) expected_values.pop(name_removed) subset_data = ds.get_parameter_data(*subset_names, start=start, end=end) subset_dataframe = ds.get_data_as_pandas_dataframe(*subset_names, start=start, end=end) verify_data_dict(subset_data, subset_dataframe, subset_names, expected_names, expected_shapes, expected_values)
def ds_to_datadicts(ds: DataSet) -> Dict[str, DataDict]: """ Make DataDicts from a qcodes DataSet. :param ds: qcodes dataset :returns: dictionary with one item per dependent. key: name of the dependent value: DataDict containing that dependent and its axes. """ ret = {} pdata = ds.get_parameter_data() for p, spec in ds.paramspecs.items(): if spec.depends_on != '': axes = spec.depends_on_ # .split(', ') data = dict() data[p] = dict(unit=spec.unit, axes=axes, values=pdata[p][p]) for ax in axes: axspec = ds.paramspecs[ax] data[ax] = dict(unit=axspec.unit, values=pdata[p][ax]) ret[p] = DataDict(**data) ret[p].validate() return ret
def test_correct_experiment_routing(two_empty_temp_db_connections, some_interdeps): """ Test that existing experiments are correctly identified AND that multiple insertions of the same runs don't matter (run insertion is idempotent) """ source_conn, target_conn = two_empty_temp_db_connections source_exp_1 = Experiment(conn=source_conn) # make 5 runs in first experiment exp_1_run_ids = [] for _ in range(5): source_dataset = DataSet(conn=source_conn, exp_id=source_exp_1.exp_id) exp_1_run_ids.append(source_dataset.run_id) source_dataset.set_interdependencies(some_interdeps[1]) source_dataset.mark_started() for val in range(10): source_dataset.add_results( [{name: val for name in some_interdeps[1].names}]) source_dataset.mark_completed() # make a new experiment with 1 run source_exp_2 = Experiment(conn=source_conn) ds = DataSet(conn=source_conn, exp_id=source_exp_2.exp_id, name="lala") exp_2_run_ids = [ds.run_id] ds.set_interdependencies(some_interdeps[1]) ds.mark_started() for val in range(10): ds.add_results([{name: val for name in some_interdeps[1].names}]) ds.mark_completed() source_path = path_to_dbfile(source_conn) target_path = path_to_dbfile(target_conn) # now copy 2 runs extract_runs_into_db(source_path, target_path, *exp_1_run_ids[:2]) target_exp1 = Experiment(conn=target_conn, exp_id=1) assert len(target_exp1) == 2 # copy two other runs, one of them already in extract_runs_into_db(source_path, target_path, *exp_1_run_ids[1:3]) assert len(target_exp1) == 3 # insert run from different experiment extract_runs_into_db(source_path, target_path, ds.run_id) assert len(target_exp1) == 3 target_exp2 = Experiment(conn=target_conn, exp_id=2) assert len(target_exp2) == 1 # finally insert every single run from experiment 1 extract_runs_into_db(source_path, target_path, *exp_1_run_ids) # check for idempotency once more by inserting all the runs but in another # order with raise_if_file_changed(target_path): extract_runs_into_db(source_path, target_path, *exp_1_run_ids[::-1]) target_exps = get_experiments(target_conn) assert len(target_exps) == 2 assert len(target_exp1) == 5 assert len(target_exp2) == 1 # check that all the datasets match up for run_id in exp_1_run_ids + exp_2_run_ids: source_ds = DataSet(conn=source_conn, run_id=run_id) target_ds = load_by_guid(guid=source_ds.guid, conn=target_conn) assert source_ds.the_same_dataset_as(target_ds) source_data = source_ds.get_parameter_data( *source_ds.parameters.split(',')) target_data = target_ds.get_parameter_data( *target_ds.parameters.split(',')) for outkey, outval in source_data.items(): for inkey, inval in outval.items(): np.testing.assert_array_equal(inval, target_data[outkey][inkey])
def test_basic_extraction(two_empty_temp_db_connections, some_interdeps): source_conn, target_conn = two_empty_temp_db_connections source_path = path_to_dbfile(source_conn) target_path = path_to_dbfile(target_conn) type_casters = { 'numeric': float, 'array': (lambda x: np.array(x) if hasattr(x, '__iter__') else np.array([x])), 'text': str } source_exp = Experiment(conn=source_conn) source_dataset = DataSet(conn=source_conn, name="basic_copy_paste_name") with pytest.raises(RuntimeError) as excinfo: extract_runs_into_db(source_path, target_path, source_dataset.run_id) assert error_caused_by(excinfo, ('Dataset not completed. An incomplete ' 'dataset can not be copied. The ' 'incomplete dataset has GUID: ' f'{source_dataset.guid} and run_id: ' f'{source_dataset.run_id}')) source_dataset.set_interdependencies(some_interdeps[0]) source_dataset.parent_dataset_links = [ Link(head=source_dataset.guid, tail=str(uuid.uuid4()), edge_type='test_link') ] source_dataset.mark_started() for value in range(10): result = { ps.name: type_casters[ps.type](value) for ps in some_interdeps[0].paramspecs } source_dataset.add_results([result]) source_dataset.add_metadata('goodness', 'fair') source_dataset.add_metadata('test', True) source_dataset.mark_completed() assert source_dataset.run_id == source_dataset.captured_run_id extract_runs_into_db(source_path, target_path, source_dataset.run_id) target_exp = Experiment(conn=target_conn, exp_id=1) length1 = len(target_exp) assert length1 == 1 # trying to insert the same run again should be a NOOP with raise_if_file_changed(target_path): extract_runs_into_db(source_path, target_path, source_dataset.run_id) assert len(target_exp) == length1 target_dataset = DataSet(conn=target_conn, run_id=1) # Now make the interesting comparisons: are the target objects the same as # the source objects? assert source_dataset.the_same_dataset_as(target_dataset) source_data = source_dataset.get_parameter_data( *source_dataset.parameters.split(',')) target_data = target_dataset.get_parameter_data( *target_dataset.parameters.split(',')) for outkey, outval in source_data.items(): for inkey, inval in outval.items(): np.testing.assert_array_equal(inval, target_data[outkey][inkey]) exp_attrs = [ 'name', 'sample_name', 'format_string', 'started_at', 'finished_at' ] for exp_attr in exp_attrs: assert getattr(source_exp, exp_attr) == getattr(target_exp, exp_attr) # trying to insert the same run again should be a NOOP with raise_if_file_changed(target_path): extract_runs_into_db(source_path, target_path, source_dataset.run_id)