Beispiel #1
0
def parameter_test_helper(ds: DataSet,
                          toplevel_names: Sequence[str],
                          expected_names: Dict[str, Sequence[str]],
                          expected_shapes: Dict[str, Sequence[Tuple[int, ...]]],
                          expected_values: Dict[str, Sequence[np.ndarray]],
                          start: Optional[int] = None,
                          end: Optional[int] = None):
    """
    A helper function to compare the data we actually read out of a given
    dataset with the expected data.

    Args:
        ds: the dataset in question
        toplevel_names: names of the toplevel parameters of the dataset
        expected_names: names of the parameters expected to be loaded for a
            given parameter as a sequence indexed by the parameter.
        expected_shapes: expected shapes of the parameters loaded. The shapes
            should be stored as a tuple per parameter in a sequence containing
            all the loaded parameters for a given requested parameter.
        expected_values: expected content of the data arrays stored in a
            sequenceexpected_names:

    """

    data = ds.get_parameter_data(*toplevel_names, start=start, end=end)
    dataframe = ds.get_data_as_pandas_dataframe(*toplevel_names,
                                                start=start,
                                                end=end)

    all_data = ds.get_parameter_data(start=start, end=end)
    all_dataframe = ds.get_data_as_pandas_dataframe(start=start, end=end)

    all_parameters = list(all_data.keys())
    assert set(data.keys()).issubset(set(all_parameters))
    assert list(data.keys()) == list(dataframe.keys())
    assert len(data.keys()) == len(toplevel_names)
    assert len(dataframe.keys()) == len(toplevel_names)

    verify_data_dict(data, dataframe, toplevel_names, expected_names,
                     expected_shapes, expected_values)
    verify_data_dict(all_data, all_dataframe, toplevel_names, expected_names,
                     expected_shapes, expected_values)

    # Now lets remove a random element from the list
    # We do this one by one until there is only one element in the list
    subset_names = copy(all_parameters)
    while len(subset_names) > 1:
        elem_to_remove = random.randint(0, len(subset_names) - 1)
        name_removed = subset_names.pop(elem_to_remove)
        expected_names.pop(name_removed)
        expected_shapes.pop(name_removed)
        expected_values.pop(name_removed)

        subset_data = ds.get_parameter_data(*subset_names,
                                            start=start, end=end)
        subset_dataframe = ds.get_data_as_pandas_dataframe(*subset_names,
                                                           start=start,
                                                           end=end)
        verify_data_dict(subset_data, subset_dataframe, subset_names,
                         expected_names, expected_shapes, expected_values)
Beispiel #2
0
def ds_to_datadicts(ds: DataSet) -> Dict[str, DataDict]:
    """
    Make DataDicts from a qcodes DataSet.

    :param ds: qcodes dataset
    :returns: dictionary with one item per dependent.
              key: name of the dependent
              value: DataDict containing that dependent and its
                     axes.
    """
    ret = {}
    pdata = ds.get_parameter_data()
    for p, spec in ds.paramspecs.items():
        if spec.depends_on != '':
            axes = spec.depends_on_ # .split(', ')
            data = dict()
            data[p] = dict(unit=spec.unit, axes=axes, values=pdata[p][p])
            for ax in axes:
                axspec = ds.paramspecs[ax]
                data[ax] = dict(unit=axspec.unit, values=pdata[p][ax])
            ret[p] = DataDict(**data)
            ret[p].validate()

    return ret
Beispiel #3
0
def test_correct_experiment_routing(two_empty_temp_db_connections,
                                    some_interdeps):
    """
    Test that existing experiments are correctly identified AND that multiple
    insertions of the same runs don't matter (run insertion is idempotent)
    """
    source_conn, target_conn = two_empty_temp_db_connections

    source_exp_1 = Experiment(conn=source_conn)

    # make 5 runs in first experiment

    exp_1_run_ids = []
    for _ in range(5):

        source_dataset = DataSet(conn=source_conn, exp_id=source_exp_1.exp_id)
        exp_1_run_ids.append(source_dataset.run_id)

        source_dataset.set_interdependencies(some_interdeps[1])

        source_dataset.mark_started()

        for val in range(10):
            source_dataset.add_results(
                [{name: val
                  for name in some_interdeps[1].names}])
        source_dataset.mark_completed()

    # make a new experiment with 1 run

    source_exp_2 = Experiment(conn=source_conn)
    ds = DataSet(conn=source_conn, exp_id=source_exp_2.exp_id, name="lala")
    exp_2_run_ids = [ds.run_id]

    ds.set_interdependencies(some_interdeps[1])

    ds.mark_started()

    for val in range(10):
        ds.add_results([{name: val for name in some_interdeps[1].names}])

    ds.mark_completed()

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    # now copy 2 runs
    extract_runs_into_db(source_path, target_path, *exp_1_run_ids[:2])

    target_exp1 = Experiment(conn=target_conn, exp_id=1)

    assert len(target_exp1) == 2

    # copy two other runs, one of them already in
    extract_runs_into_db(source_path, target_path, *exp_1_run_ids[1:3])

    assert len(target_exp1) == 3

    # insert run from different experiment
    extract_runs_into_db(source_path, target_path, ds.run_id)

    assert len(target_exp1) == 3

    target_exp2 = Experiment(conn=target_conn, exp_id=2)

    assert len(target_exp2) == 1

    # finally insert every single run from experiment 1

    extract_runs_into_db(source_path, target_path, *exp_1_run_ids)

    # check for idempotency once more by inserting all the runs but in another
    # order
    with raise_if_file_changed(target_path):
        extract_runs_into_db(source_path, target_path, *exp_1_run_ids[::-1])

    target_exps = get_experiments(target_conn)

    assert len(target_exps) == 2
    assert len(target_exp1) == 5
    assert len(target_exp2) == 1

    # check that all the datasets match up
    for run_id in exp_1_run_ids + exp_2_run_ids:
        source_ds = DataSet(conn=source_conn, run_id=run_id)
        target_ds = load_by_guid(guid=source_ds.guid, conn=target_conn)

        assert source_ds.the_same_dataset_as(target_ds)

        source_data = source_ds.get_parameter_data(
            *source_ds.parameters.split(','))
        target_data = target_ds.get_parameter_data(
            *target_ds.parameters.split(','))

        for outkey, outval in source_data.items():
            for inkey, inval in outval.items():
                np.testing.assert_array_equal(inval,
                                              target_data[outkey][inkey])
Beispiel #4
0
def test_basic_extraction(two_empty_temp_db_connections, some_interdeps):
    source_conn, target_conn = two_empty_temp_db_connections

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    type_casters = {
        'numeric':
        float,
        'array': (lambda x: np.array(x)
                  if hasattr(x, '__iter__') else np.array([x])),
        'text':
        str
    }

    source_exp = Experiment(conn=source_conn)
    source_dataset = DataSet(conn=source_conn, name="basic_copy_paste_name")

    with pytest.raises(RuntimeError) as excinfo:
        extract_runs_into_db(source_path, target_path, source_dataset.run_id)

    assert error_caused_by(excinfo, ('Dataset not completed. An incomplete '
                                     'dataset can not be copied. The '
                                     'incomplete dataset has GUID: '
                                     f'{source_dataset.guid} and run_id: '
                                     f'{source_dataset.run_id}'))

    source_dataset.set_interdependencies(some_interdeps[0])

    source_dataset.parent_dataset_links = [
        Link(head=source_dataset.guid,
             tail=str(uuid.uuid4()),
             edge_type='test_link')
    ]
    source_dataset.mark_started()

    for value in range(10):
        result = {
            ps.name: type_casters[ps.type](value)
            for ps in some_interdeps[0].paramspecs
        }
        source_dataset.add_results([result])

    source_dataset.add_metadata('goodness', 'fair')
    source_dataset.add_metadata('test', True)

    source_dataset.mark_completed()

    assert source_dataset.run_id == source_dataset.captured_run_id

    extract_runs_into_db(source_path, target_path, source_dataset.run_id)

    target_exp = Experiment(conn=target_conn, exp_id=1)

    length1 = len(target_exp)
    assert length1 == 1

    # trying to insert the same run again should be a NOOP
    with raise_if_file_changed(target_path):
        extract_runs_into_db(source_path, target_path, source_dataset.run_id)

    assert len(target_exp) == length1

    target_dataset = DataSet(conn=target_conn, run_id=1)

    # Now make the interesting comparisons: are the target objects the same as
    # the source objects?

    assert source_dataset.the_same_dataset_as(target_dataset)

    source_data = source_dataset.get_parameter_data(
        *source_dataset.parameters.split(','))
    target_data = target_dataset.get_parameter_data(
        *target_dataset.parameters.split(','))

    for outkey, outval in source_data.items():
        for inkey, inval in outval.items():
            np.testing.assert_array_equal(inval, target_data[outkey][inkey])

    exp_attrs = [
        'name', 'sample_name', 'format_string', 'started_at', 'finished_at'
    ]

    for exp_attr in exp_attrs:
        assert getattr(source_exp, exp_attr) == getattr(target_exp, exp_attr)

    # trying to insert the same run again should be a NOOP
    with raise_if_file_changed(target_path):
        extract_runs_into_db(source_path, target_path, source_dataset.run_id)