Exemple #1
0
def test_is_run_id_in_db(empty_temp_db):
    conn = mut_db.connect(get_DB_location())
    mut_queries.new_experiment(conn, 'test_exp', 'no_sample')

    for _ in range(5):
        ds = DataSet(conn=conn, run_id=None)

    # there should now be run_ids 1, 2, 3, 4, 5 in the database
    good_ids = [1, 2, 3, 4, 5]
    try_ids = [1, 3, 9999, 23, 0, 1, 1, 3, 34]

    sorted_try_ids = np.unique(try_ids)

    expected_dict = {tid: (tid in good_ids) for tid in sorted_try_ids}

    acquired_dict = mut_queries.is_run_id_in_database(conn, *try_ids)

    assert expected_dict == acquired_dict
Exemple #2
0
def extract_runs_into_db(source_db_path: str,
                         target_db_path: str, *run_ids: int,
                         upgrade_source_db: bool = False,
                         upgrade_target_db: bool = False) -> None:
    """
    Extract a selection of runs into another DB file. All runs must come from
    the same experiment. They will be added to an experiment with the same name
    and ``sample_name`` in the target db. If such an experiment does not exist, it
    will be created.

    Args:
        source_db_path: Path to the source DB file
        target_db_path: Path to the target DB file. The target DB file will be
          created if it does not exist.
        run_ids: The ``run_id``'s of the runs to copy into the target DB file
        upgrade_source_db: If the source DB is found to be in a version that is
          not the newest, should it be upgraded?
        upgrade_target_db: If the target DB is found to be in a version that is
          not the newest, should it be upgraded?
    """
    # Check for versions
    (s_v, new_v) = get_db_version_and_newest_available_version(source_db_path)
    if s_v < new_v and not upgrade_source_db:
        warn(f'Source DB version is {s_v}, but this function needs it to be'
             f' in version {new_v}. Run this function again with '
             'upgrade_source_db=True to auto-upgrade the source DB file.')
        return

    if os.path.exists(target_db_path):
        (t_v, new_v) = get_db_version_and_newest_available_version(target_db_path)
        if t_v < new_v and not upgrade_target_db:
            warn(f'Target DB version is {t_v}, but this function needs it to '
                 f'be in version {new_v}. Run this function again with '
                 'upgrade_target_db=True to auto-upgrade the target DB file.')
            return

    source_conn = connect(source_db_path)

    # Validate that all runs are in the source database
    do_runs_exist = is_run_id_in_database(source_conn, *run_ids)
    if False in do_runs_exist.values():
        source_conn.close()
        non_existing_ids = [rid for rid in run_ids if not do_runs_exist[rid]]
        err_mssg = ("Error: not all run_ids exist in the source database. "
                    "The following run(s) is/are not present: "
                    f"{non_existing_ids}")
        raise ValueError(err_mssg)

    # Validate that all runs are from the same experiment

    source_exp_ids = np.unique(get_exp_ids_from_run_ids(source_conn, run_ids))
    if len(source_exp_ids) != 1:
        source_conn.close()
        raise ValueError('Did not receive runs from a single experiment. '
                         f'Got runs from experiments {source_exp_ids}')

    # Fetch the attributes of the runs' experiment
    # hopefully, this is enough to uniquely identify the experiment
    exp_attrs = get_experiment_attributes_by_exp_id(source_conn, source_exp_ids[0])

    # Massage the target DB file to accommodate the runs
    # (create new experiment if needed)

    target_conn = connect(target_db_path)

    # this function raises if the target DB file has several experiments
    # matching both the name and sample_name

    try:
        with atomic(target_conn) as target_conn:

            target_exp_id = _create_exp_if_needed(target_conn,
                                                  exp_attrs['name'],
                                                  exp_attrs['sample_name'],
                                                  exp_attrs['format_string'],
                                                  exp_attrs['start_time'],
                                                  exp_attrs['end_time'])

            # Finally insert the runs
            for run_id in run_ids:
                _extract_single_dataset_into_db(DataSet(run_id=run_id,
                                                        conn=source_conn),
                                                target_conn,
                                                target_exp_id)
    finally:
        source_conn.close()
        target_conn.close()