コード例 #1
0
def test_metrics(dim):
    y = np.zeros(dim)
    x = np.ones(dim)
    scaling_1 = np.ones(dim)
    scaling_2 = 0.5 * np.ones(dim)

    for p in range(1, 10):
        assert np.abs(metric_lp(x, y, p, scaling_1) - np.power(dim, 1.0 / p)) < 1e-15
        assert (
            np.abs(metric_lp(x, y, p, scaling_2) - 2 * np.power(dim, 1.0 / p)) < 1e-15
        )
コード例 #2
0
def map_to_representative(state,
                          lp_metric,
                          representative_states,
                          n_representatives,
                          min_dist,
                          scaling,
                          accept_new_repr):
    """
    Map state to representative state.
    """
    dist_to_closest = np.inf
    argmin = -1
    for ii in range(n_representatives):
        dist = metric_lp(state, representative_states[ii, :],
                         lp_metric, scaling)
        if dist < dist_to_closest:
            dist_to_closest = dist
            argmin = ii

    max_representatives = representative_states.shape[0]
    if dist_to_closest > min_dist \
        and n_representatives < max_representatives \
            and accept_new_repr:
        new_index = n_representatives
        representative_states[new_index, :] = state
        return new_index, 0.0
    return argmin, dist_to_closest
コード例 #3
0
ファイル: rs_kernel_ucbvi.py プロジェクト: omardrwch/rlberry
def update_model(
    repr_state,
    action,
    repr_next_state,
    reward,
    n_representatives,
    repr_states,
    lp_metric,
    scaling,
    bandwidth,
    bonus_scale_factor,
    beta,
    v_max,
    bonus_type,
    kernel_type,
    N_sa,
    B_sa,
    P_hat,
    R_hat,
):
    """
    Model update function, lots of arguments so we can use JIT :)
    """
    # aux var for transition update
    dirac_next_s = np.zeros(n_representatives)
    dirac_next_s[repr_next_state] = 1.0

    for u_repr_state in range(n_representatives):
        # compute weight
        dist = metric_lp(repr_states[repr_state, :],
                         repr_states[u_repr_state, :], lp_metric, scaling)
        weight = kernel_func(dist / bandwidth, kernel_type=kernel_type)

        # aux variables
        prev_N_sa = beta + N_sa[u_repr_state, action]  # regularization beta
        current_N_sa = prev_N_sa + weight

        # update weights
        N_sa[u_repr_state, action] += weight

        # update transitions
        P_hat[u_repr_state, action, :n_representatives] = (
            dirac_next_s * weight / current_N_sa + (prev_N_sa / current_N_sa) *
            P_hat[u_repr_state, action, :n_representatives])

        # update rewards
        R_hat[u_repr_state, action] = (
            weight * reward / current_N_sa +
            (prev_N_sa / current_N_sa) * R_hat[u_repr_state, action])

        # update bonus
        B_sa[u_repr_state,
             action] = compute_bonus(N_sa[u_repr_state, action], beta,
                                     bonus_scale_factor, v_max, bonus_type)