Python read_new_rows 예제들, estimagic.logging.database_utilities.read_new_rows Python 예제들

예제 #1

0

파일 보기

def _update_convergence_plots(
    database,
    criterion_cds,
    param_cds,
    session_data,
    start_params,
    rollover,
    update_chunk,
    stride,
):
    """Callback to look up new entries in the database and plot them.

    Args:
        database (sqlalchemy.MetaData)
        session_data (dict):
            infos to be passed between and within apps.
            Keys of this app's entry are:
            - last_retrieved (int): last iteration currently in the ColumnDataSource
            - database_path
        start_params (pd.DataFrame)
        rollover (int): maximal number of points to show in the plot
        update_chunk (int): Number of values to add at each update.
        criterion_cds (bokeh.ColumnDataSource)
        param_cds (bokeh.ColumnDataSource)
        stride (int): Plot every stride_th database row in the dashboard. Note that
            some database rows only contain gradient evaluations, thus for some values
            of stride the convergence plot of the criterion function can be empty.

    """
    clip_bound = np.finfo(float).max
    data, new_last = read_new_rows(
        database=database,
        table_name="optimization_iterations",
        last_retrieved=session_data["last_retrieved"],
        return_type="dict_of_lists",
        limit=update_chunk,
        stride=stride,
    )

    # update the criterion plot
    # todo: remove None entries!
    missing = [i for i, val in enumerate(data["value"]) if val is None]
    crit_data = {
        "iteration":
        [id_ for i, id_ in enumerate(data["rowid"]) if i not in missing],
        "criterion": [
            np.clip(val, -clip_bound, clip_bound)
            for i, val in enumerate(data["value"]) if i not in missing
        ],
    }
    _stream_data(cds=criterion_cds, data=crit_data, rollover=rollover)

    # update the parameter plots
    # Note: we need **all** parameter ids to correctly map them to the parameter entries
    # in the database. Only after can we restrict them to the entries we need.
    param_ids = start_params["id"].tolist()
    params_data = _create_params_data_for_update(data, param_ids, clip_bound)
    _stream_data(cds=param_cds, data=params_data, rollover=rollover)
    # update last retrieved
    session_data["last_retrieved"] = new_last

예제 #2

0

파일 보기

def test_steps_table(tmp_path):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_steps_table(database)
    for status in ["scheduled", "running", "completed"]:
        append_row(
            {
                "status": status,
                "n_iterations": 0,
                "type": "optimization",
                "name": "bla",
            },
            "steps",
            database,
            path,
            False,
        )

    res, _ = read_new_rows(database, "steps", 1, "dict_of_lists")

    expected = {
        "rowid": [2, 3],
        "status": ["running", "completed"],
        "type": ["optimization", "optimization"],
        "name": ["bla", "bla"],
        "n_iterations": [0, 0],
    }
    assert res == expected

예제 #3

0

파일 보기

파일: read_log.py 프로젝트: OpenSourceEconomics/estimagic

def _read_optimization_history(database, params_treedef, registry):
    """Read a histories out values, parameters and other information."""

    raw_res, _ = read_new_rows(
        database=database,
        table_name="optimization_iterations",
        last_retrieved=0,
        return_type="list_of_dicts",
    )

    history = {"params": [], "criterion": [], "runtime": []}
    for data in raw_res:
        if data["value"] is not None:
            params = tree_unflatten(params_treedef,
                                    data["params"],
                                    registry=registry)
            history["params"].append(params)
            history["criterion"].append(data["value"])
            history["runtime"].append(data["timestamp"])

    times = np.array(history["runtime"])
    times -= times[0]
    history["runtime"] = times

    return history

예제 #4

0

파일 보기

파일: test_database_utilities.py 프로젝트: yradeva93/estimagic

def test_optimization_status_table(tmp_path):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_optimization_status_table(database)
    for status in ["scheduled", "running", "success"]:
        append_row({"status": status}, "optimization_status", database, path, False)

    res, _ = read_new_rows(database, "optimization_status", 1, "dict_of_lists")

    expected = {"rowid": [2, 3], "status": ["running", "success"]}
    assert res == expected

예제 #5

0

파일 보기

def test_read_new_rows_stride(tmp_path, iteration_data):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_optimization_iteration_table(database, first_eval={"output": 0.5})
    for i in range(1, 11):  # sqlalchemy starts counting at 1
        iteration_data["value"] = i
        append_row(iteration_data, "optimization_iterations", database, path, False)

    res = read_new_rows(
        database=database,
        table_name="optimization_iterations",
        last_retrieved=1,
        return_type="dict_of_lists",
        stride=2,
    )[0]["value"]

    expected = [2.0, 4.0, 6.0, 8.0, 10.0]
    assert res == expected

예제 #6

0

파일 보기

def test_read_new_rows_with_step(tmp_path, iteration_data):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_optimization_iteration_table(database, first_eval={"output": 0.5})
    for i in range(1, 11):  # sqlalchemy starts counting at 1
        iteration_data["value"] = i
        iteration_data["step"] = i % 2
        append_row(iteration_data, "optimization_iterations", database, path, False)

    res, _ = read_new_rows(
        database=database,
        table_name="optimization_iterations",
        last_retrieved=0,
        return_type="dict_of_lists",
        step=0,
    )

    expected = [2, 4, 6, 8, 10]
    assert res["rowid"] == expected

예제 #7

0

파일 보기

def test_update_row(tmp_path, iteration_data):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_optimization_iteration_table(database, first_eval={"output": 0.5})
    for i in range(1, 11):  # sqlalchemy starts counting at 1
        iteration_data["value"] = i
        append_row(iteration_data, "optimization_iterations", database, path, False)

    update_row({"value": 20}, 8, "optimization_iterations", database, path, False)

    res = read_new_rows(
        database=database,
        table_name="optimization_iterations",
        last_retrieved=3,
        return_type="dict_of_lists",
    )[0]["value"]

    expected = [4, 5, 6, 7, 20, 9, 10]
    assert res == expected

예제 #8

0

파일 보기

파일: read_log.py 프로젝트: OpenSourceEconomics/estimagic

def read_optimization_histories(path_or_database):
    """Read a histories out values, parameters and other information."""
    database = load_database(**_process_path_or_database(path_or_database))

    start_params = read_start_params(path_or_database)

    raw_res, _ = read_new_rows(
        database=database,
        table_name="optimization_iterations",
        last_retrieved=0,
        return_type="dict_of_lists",
    )

    params_history = pd.DataFrame(raw_res["params"],
                                  columns=start_params.index)
    value_history = pd.Series(raw_res["value"])

    metadata = pd.DataFrame()
    metadata["timestamps"] = raw_res["timestamp"]
    metadata["valid"] = raw_res["valid"]
    metadata["has_value"] = value_history.notnull()
    metadata["has_derivative"] = [
        d is not None for d in raw_res["internal_derivative"]
    ]

    histories = {
        "values": value_history.dropna(),
        "params": params_history,
        "metadata": metadata,
    }

    if "contributions" in raw_res:
        first_contrib = raw_res["contributions"][0]
        if isinstance(first_contrib, pd.Series):
            columns = first_contrib.index
        else:
            columns = None
        contributions_history = pd.DataFrame(raw_res["contributions"],
                                             columns=columns).dropna()
        histories["contributions"] = contributions_history

    return histories

예제 #9

0

파일 보기

파일: read_log.py 프로젝트: OpenSourceEconomics/estimagic

def read_optimization_problem_table(path_or_database):
    """Load the start parameters DataFrame.

    Args:
        path_or_database (pathlib.Path, str or sqlalchemy.MetaData)

    Returns:
        params (pd.DataFrame): see :ref:`params`.

    """
    database = _load_database(path_or_database)
    steps_table, _ = read_new_rows(
        database=database,
        table_name="optimization_problem",
        last_retrieved=0,
        return_type="list_of_dicts",
    )
    steps_df = pd.DataFrame(steps_table)

    return steps_df

예제 #10

0

파일 보기

파일: read_log.py 프로젝트: OpenSourceEconomics/estimagic

def read_steps_table(path_or_database):
    """Load the steps table.

    Args:
        path_or_database (pathlib.Path, str or sqlalchemy.MetaData)

    Returns:
        steps_df (pandas.DataFrame)

    """
    database = _load_database(path_or_database)
    steps_table, _ = read_new_rows(
        database=database,
        table_name="steps",
        last_retrieved=0,
        return_type="list_of_dicts",
    )
    steps_df = pd.DataFrame(steps_table)

    return steps_df

예제 #11

0

파일 보기

def test_all_steps_occur_in_optimization_iterations_if_no_convergence(params):
    options = {"convergence_max_discoveries": np.inf}

    minimize(
        criterion=sos_dict_criterion,
        params=params,
        algorithm="scipy_lbfgsb",
        multistart=True,
        multistart_options=options,
        logging="logging.db",
    )

    database = load_database(path="logging.db")
    iterations, _ = read_new_rows(
        database=database,
        table_name="optimization_iterations",
        last_retrieved=0,
        return_type="dict_of_lists",
    )

    present_steps = set(iterations["step"])

    assert present_steps == {1, 2, 3, 4, 5}

예제 #12

0

파일 보기

파일: read_log.py 프로젝트: OpenSourceEconomics/estimagic

def _read_multistart_optimization_history(database, params_treedef, registry,
                                          direction):
    """Read multistart histories out values, parameters and other information.

    Returns:
        tuple:
        - dict: history that led to lowest criterion
        - dict: all other histories
        - dict: exploration phase

    """
    # ==================================================================================
    # Process raw data
    # ==================================================================================
    steps = read_steps_table(database)

    raw_res, _ = read_new_rows(
        database=database,
        table_name="optimization_iterations",
        last_retrieved=0,
        return_type="list_of_dicts",
    )

    history = {"params": [], "criterion": [], "runtime": [], "step": []}
    for data in raw_res:
        if data["value"] is not None:
            params = tree_unflatten(params_treedef,
                                    data["params"],
                                    registry=registry)
            history["params"].append(params)
            history["criterion"].append(data["value"])
            history["runtime"].append(data["timestamp"])
            history["step"].append(data["step"])

    times = np.array(history["runtime"])
    times -= times[0]
    history["runtime"] = times

    # ==================================================================================
    # Format data as data frames
    # ==================================================================================
    df = pd.DataFrame(history)
    df = df.merge(steps[["rowid", "type"]], left_on="step", right_on="rowid")
    df = df.drop(columns="rowid")

    # ==================================================================================
    # Extract data from df
    # ==================================================================================
    exploration = df.query("type == 'exploration'").drop(
        columns=["step", "type"])

    histories = df.query("type == 'optimization'")
    histories = histories.drop(columns="type")
    histories = histories.set_index("step", append=True)

    # ==================================================================================
    # The best history is given by the history that attains the global minimum or
    # maximum. All other histories are defined as local histories.

    if direction == "minimize":
        best_idx = (
            histories["criterion"].groupby(level="step").min().idxmin()
        )  # noqa: F841
        exploration = exploration.sort_values(by="criterion", ascending=True)
    elif direction == "maximize":
        best_idx = (
            histories["criterion"].groupby(level="step").max().idxmax()
        )  # noqa: F841
        exploration = exploration.sort_values(by="criterion", ascending=False)
    else:
        raise ValueError()

    history = histories.xs(best_idx, level="step").to_dict(orient="list")

    exploration = None if len(exploration) == 0 else exploration
    if exploration is not None:
        exploration = exploration.to_dict(orient="list")

    local_histories = []
    for idx in histories.index.get_level_values("step").unique().difference(
        [best_idx]):
        _local_history = histories.xs(idx, level="step").to_dict(orient="list")
        local_histories.append(_local_history)

    local_histories = None if len(local_histories) == 0 else local_histories

    return history, local_histories, exploration