Esempio n. 1
0
def calculate_virtual_reward(
    observs: Tensor,
    rewards: Tensor,
    oobs: Tensor = None,
    dist_coef: float = 1.0,
    reward_coef: float = 1.0,
    other_reward: Tensor = 1.0,
    return_compas: bool = False,
    distance_function: Callable = l2_norm,
):
    """Calculate the virtual rewards given the required data."""

    compas = get_alive_indexes(oobs) if oobs is not None else judo.arange(
        len(rewards))
    compas = random_state.permutation(compas)
    flattened_observs = observs.reshape(len(oobs), -1)
    other_reward = other_reward.flatten() if dtype.is_tensor(
        other_reward) else other_reward
    distance = distance_function(flattened_observs, flattened_observs[compas])
    distance_norm = relativize(distance.flatten())
    rewards_norm = relativize(rewards)

    virtual_reward = distance_norm**dist_coef * rewards_norm**reward_coef * other_reward
    return virtual_reward.flatten() if not return_compas else (
        virtual_reward.flatten(), compas)
Esempio n. 2
0
def cross_virtual_reward(
    host_observs: Tensor,
    host_rewards: Tensor,
    ext_observs: Tensor,
    ext_rewards: Tensor,
    dist_coef: float = 1.0,
    reward_coef: float = 1.0,
    return_compas: bool = False,
    distance_function: Callable = l2_norm,
):
    """Calculate the virtual rewards between two cloud of points."""
    host_observs = host_observs.reshape(len(host_rewards), -1)
    ext_observs = ext_observs.reshape(len(ext_rewards), -1)
    compas_host = random_state.permutation(judo.arange(len(host_rewards)))
    compas_ext = random_state.permutation(judo.arange(len(ext_rewards)))

    # TODO: check if it's better for the distances to be the same for host and ext
    h_dist = distance_function(host_observs, ext_observs[compas_host])
    e_dist = distance_function(ext_observs, host_observs[compas_ext])
    host_distance = relativize(h_dist.flatten())
    ext_distance = relativize(e_dist.flatten())

    host_rewards = relativize(host_rewards)
    ext_rewards = relativize(ext_rewards)

    host_vr = host_distance**dist_coef * host_rewards**reward_coef
    ext_vr = ext_distance**dist_coef * ext_rewards**reward_coef
    if return_compas:
        return (host_vr, compas_host), (ext_vr, compas_ext)
    return host_vr, ext_vr
Esempio n. 3
0
def relativize(x: Tensor) -> Tensor:
    """Normalize the data using a custom smoothing technique."""
    orig = x
    x = judo.astype(x, dtype.float)
    std = x.std()
    if float(std) == 0:
        return judo.ones(len(x), dtype=orig.dtype)
    standard = (x - x.mean()) / std
    with numpy.errstate(invalid="ignore", divide="ignore"):
        res = judo.where(standard > 0.0,
                         judo.log(1.0 + standard) + 1.0, judo.exp(standard))
    return res
Esempio n. 4
0
def cross_clone(
    host_virtual_rewards: Tensor,
    ext_virtual_rewards: Tensor,
    host_oobs: Tensor = None,
    eps=1e-3,
):
    """Perform a clone operation between two different groups of points."""
    compas_ix = random_state.permutation(judo.arange(len(ext_virtual_rewards)))
    host_vr = judo.astype(host_virtual_rewards.flatten(), dtype=dtype.float32)
    ext_vr = judo.astype(ext_virtual_rewards.flatten(), dtype=dtype.float32)
    clone_probs = (ext_vr[compas_ix] - host_vr) / judo.where(
        ext_vr > eps, ext_vr, tensor(eps, dtype=dtype.float32))
    will_clone = clone_probs.flatten() > random_state.random(len(clone_probs))
    if host_oobs is not None:
        will_clone[host_oobs] = True
    return compas_ix, will_clone
Esempio n. 5
0
def calculate_clone(virtual_rewards: Tensor, oobs: Tensor = None, eps=1e-3):
    """Calculate the clone indexes and masks from the virtual rewards."""
    compas_ix = get_alive_indexes(oobs) if oobs is not None else judo.arange(
        len(virtual_rewards))
    compas_ix = random_state.permutation(compas_ix)
    vir_rew = virtual_rewards.flatten()
    clone_probs = (vir_rew[compas_ix] - vir_rew) / judo.where(
        vir_rew > eps, vir_rew, tensor(eps))
    will_clone = clone_probs.flatten() > random_state.random(len(clone_probs))
    return compas_ix, will_clone
Esempio n. 6
0
def calculate_distance(
    observs: Tensor,
    distance_function: Callable = l2_norm,
    return_compas: bool = False,
    oobs: Tensor = None,
    compas: Tensor = None,
):
    """Calculate a distance metric for each walker with respect to a random companion."""
    if compas is None:
        compas = get_alive_indexes(oobs) if oobs is not None else judo.arange(
            observs.shape[0])
        compas = random_state.permutation(compas)
    flattened_observs = observs.view(observs.shape[0], -1)
    distance = distance_function(flattened_observs, flattened_observs[compas])
    distance_norm = relativize(distance.flatten())
    return distance_norm if not return_compas else (distance_norm, compas)
Esempio n. 7
0
def resize_image(frame: Tensor,
                 width: int,
                 height: int,
                 mode: str = "RGB") -> Tensor:
    """
    Use PIL to resize an RGB frame to an specified height and width.

    Args:
        frame: Target numpy array representing the image that will be resized.
        width: Width of the resized image.
        height: Height of the resized image.
        mode: Passed to Image.convert.

    Returns:
        The resized frame that matches the provided width and height.

    """
    from PIL import Image

    frame = judo.to_numpy(frame)
    with judo.Backend.use_backend(name="numpy"):
        frame = Image.fromarray(frame)
        frame = judo.to_numpy(frame.convert(mode).resize(size=(width, height)))
    return judo.tensor(frame)