Ejemplo n.º 1
0
def get_mushroom_data():
    context_dim = 5
    n_samples = 5000
    n_actions = 2

    # mock mushroom data
    X = np.random.rand(n_samples, context_dim)
    # relate y to X
    get_norm = lambda x_i: np.linalg.norm(x_i) > 2.5
    y = np.apply_along_axis(get_norm, 1, X)
    #y = np.random.choice([0, 1], size=n_samples)

    # sample the problem T steps for simulation

    # should always eat when good
    # avoid eating when bad
    samples = sample_mushroom(X,
                              y,
                              n_samples,
                              r_eat_good=5.0,
                              r_eat_bad_lucky=5.0,
                              r_eat_bad_unlucky=-35.0,
                              r_eat_bad_lucky_prob=0.1,
                              r_no_eat=0.0)
    return n_samples, n_actions, context_dim, samples
Ejemplo n.º 2
0
def test_sample_mushroom(mushroom_data):
    X, y = mushroom_data

    n_samples = 5 * (10 ** 4)
    samples = sample_mushroom(X,
                              y,
                              n_samples,
                              r_eat_good=5.0,
                              r_eat_bad_lucky=5.0,
                              r_eat_bad_unlucky=-35.0,
                              r_eat_bad_lucky_prob=0.1,
                              r_no_eat=0.0
                              )

    contexts, r_acts, opt_acts_hidden, is_poisonous_hidden = samples
    r_no_eats, r_eats = r_acts[:, 0], r_acts[:, 1]

    dim_context = 117

    assert contexts.shape[0] == n_samples
    assert contexts.shape[1] == dim_context

    assert np.mean(r_no_eats) == 0.0


    # good mush -> must eat
    is_edible  = ~(is_poisonous_hidden.astype(bool))
    assert np.all(opt_acts_hidden[is_edible])