Esempio n. 1
0
def test_combine_runs(two_empty_temp_db_connections, empty_temp_db_connection,
                      some_interdeps):
    """
    Test that datasets that are exported in random order from 2 datasets
    can be reloaded by the original captured_run_id and the experiment
    name.
    """
    source_conn_1, source_conn_2 = two_empty_temp_db_connections
    target_conn = empty_temp_db_connection

    source_1_exp = Experiment(conn=source_conn_1,
                              name='exp1',
                              sample_name='no_sample')
    source_1_datasets = [
        DataSet(conn=source_conn_1, exp_id=source_1_exp.exp_id)
        for i in range(10)
    ]

    source_2_exp = Experiment(conn=source_conn_2,
                              name='exp2',
                              sample_name='no_sample')

    source_2_datasets = [
        DataSet(conn=source_conn_2, exp_id=source_2_exp.exp_id)
        for i in range(10)
    ]

    source_all_datasets = source_1_datasets + source_2_datasets

    shuffled_datasets = source_all_datasets.copy()
    random.shuffle(shuffled_datasets)

    for ds in source_all_datasets:
        ds.set_interdependencies(some_interdeps[1])
        ds.mark_started()
        ds.add_results([{name: 0.0 for name in some_interdeps[1].names}])
        ds.mark_completed()

    # now let's insert all datasets in random order
    for ds in shuffled_datasets:
        extract_runs_into_db(ds.conn.path_to_dbfile,
                             target_conn.path_to_dbfile, ds.run_id)

    for ds in source_all_datasets:
        loaded_ds = load_by_run_spec(captured_run_id=ds.captured_run_id,
                                     experiment_name=ds.exp_name,
                                     conn=target_conn)
        assert ds.the_same_dataset_as(loaded_ds)

    for ds in source_all_datasets:
        loaded_ds = load_by_run_spec(captured_run_id=ds.captured_counter,
                                     experiment_name=ds.exp_name,
                                     conn=target_conn)
        assert ds.the_same_dataset_as(loaded_ds)

    # Now test that we generate the correct table for the guids above
    # this could be split out into its own test
    # but the test above has the useful side effect of
    # setting up datasets for this test.
    guids = [ds.guid for ds in source_all_datasets]

    table = generate_dataset_table(guids, conn=target_conn)
    lines = table.split('\n')
    headers = re.split(r'\s+', lines[0].strip())

    cfg = qc.config
    guid_comp = cfg['GUID_components']

    # borrowed fallback logic from generate_guid
    sampleint = guid_comp['sample']
    if sampleint == 0:
        sampleint = int('a' * 8, base=16)

    for i in range(2, len(lines)):
        split_line = re.split(r'\s+', lines[i].strip())
        mydict = {headers[j]: split_line[j] for j in range(len(split_line))}
        ds = load_by_guid(guids[i - 2], conn=target_conn)
        assert ds.captured_run_id == int(mydict['captured_run_id'])
        assert ds.captured_counter == int(mydict['captured_counter'])
        assert ds.exp_name == mydict['experiment_name']
        assert ds.sample_name == mydict['sample_name']
        assert int(mydict['sample_id']) == sampleint
        assert guid_comp['location'] == int(mydict['location'])
        assert guid_comp['work_station'] == int(mydict['work_station'])
Esempio n. 2
0
def test_correct_experiment_routing(two_empty_temp_db_connections,
                                    some_interdeps):
    """
    Test that existing experiments are correctly identified AND that multiple
    insertions of the same runs don't matter (run insertion is idempotent)
    """
    source_conn, target_conn = two_empty_temp_db_connections

    source_exp_1 = Experiment(conn=source_conn)

    # make 5 runs in first experiment

    exp_1_run_ids = []
    for _ in range(5):

        source_dataset = DataSet(conn=source_conn, exp_id=source_exp_1.exp_id)
        exp_1_run_ids.append(source_dataset.run_id)

        source_dataset.set_interdependencies(some_interdeps[1])

        source_dataset.mark_started()

        for val in range(10):
            source_dataset.add_results(
                [{name: val
                  for name in some_interdeps[1].names}])
        source_dataset.mark_completed()

    # make a new experiment with 1 run

    source_exp_2 = Experiment(conn=source_conn)
    ds = DataSet(conn=source_conn, exp_id=source_exp_2.exp_id, name="lala")
    exp_2_run_ids = [ds.run_id]

    ds.set_interdependencies(some_interdeps[1])

    ds.mark_started()

    for val in range(10):
        ds.add_results([{name: val for name in some_interdeps[1].names}])

    ds.mark_completed()

    source_path = path_to_dbfile(source_conn)
    target_path = path_to_dbfile(target_conn)

    # now copy 2 runs
    extract_runs_into_db(source_path, target_path, *exp_1_run_ids[:2])

    target_exp1 = Experiment(conn=target_conn, exp_id=1)

    assert len(target_exp1) == 2

    # copy two other runs, one of them already in
    extract_runs_into_db(source_path, target_path, *exp_1_run_ids[1:3])

    assert len(target_exp1) == 3

    # insert run from different experiment
    extract_runs_into_db(source_path, target_path, ds.run_id)

    assert len(target_exp1) == 3

    target_exp2 = Experiment(conn=target_conn, exp_id=2)

    assert len(target_exp2) == 1

    # finally insert every single run from experiment 1

    extract_runs_into_db(source_path, target_path, *exp_1_run_ids)

    # check for idempotency once more by inserting all the runs but in another
    # order
    with raise_if_file_changed(target_path):
        extract_runs_into_db(source_path, target_path, *exp_1_run_ids[::-1])

    target_exps = get_experiments(target_conn)

    assert len(target_exps) == 2
    assert len(target_exp1) == 5
    assert len(target_exp2) == 1

    # check that all the datasets match up
    for run_id in exp_1_run_ids + exp_2_run_ids:
        source_ds = DataSet(conn=source_conn, run_id=run_id)
        target_ds = load_by_guid(guid=source_ds.guid, conn=target_conn)

        assert source_ds.the_same_dataset_as(target_ds)

        source_data = source_ds.get_data(*source_ds.parameters.split(','))
        target_data = target_ds.get_data(*target_ds.parameters.split(','))

        assert source_data == target_data