Example #1
0
def test_explore():
    assert EpsilonGreedy(eps=0.5).explore(1, 1) in [True, False]
    assert EpsilonGreedy(eps=1).explore(1, 1) is True
    assert EpsilonGreedy(eps=0).explore(1, 1) is False
    assert DecreasingEpsilonGreedy().explore(1, 1) in [True, False]
    assert Temperature().explore(1, 1) in [True, False]
    assert Temperature(beta=0).explore(1, 1) is True
Example #2
0
def test_environment_prisoners():
    test_1 = DiscreteSynchronEnvironment(
        n_periods=10000,
        possible_prices=[2, 3],
        demand=PrisonersDilemmaDemand(),
        agents=[
            Qlearning(discount=0.95, learning_rate=0.3, decision=EpsilonGreedy(eps=0.1)),
            Qlearning(discount=0.95, learning_rate=0.3, decision=EpsilonGreedy(eps=0.1)),
        ],
    )

    test_2 = DiscreteSynchronEnvironment(
        n_periods=10,
        possible_prices=[1, 2],
        demand=PrisonersDilemmaDemand(),
        agents=[Qlearning(discount=0.95, learning_rate=0.5, decision=DecreasingEpsilonGreedy()), AlwaysDefectAgent()],
    )

    test_3 = DiscreteSynchronEnvironment(
        n_periods=10000,
        possible_prices=[1, 2],
        demand=PrisonersDilemmaDemand(),
        agents=[
            Qlearning(discount=0.95, learning_rate=0.5, decision=DecreasingEpsilonGreedy()),
            Qlearning(discount=0.5, learning_rate=0.1, decision=DecreasingEpsilonGreedy()),
        ],
    )

    assert test_1.play_game()
    assert test_2.play_game()
    assert test_3.play_game()
Example #3
0
def test_correct_init():
    env = DiscreteSynchronEnvironment(
        n_periods=100,
        n_prices=100,
        history_after=0,
        agents=[
            Qlearning(decision=EpsilonGreedy(eps=1.0)),
            Qlearning(decision=EpsilonGreedy(eps=1.0))
        ],
    )
    env.play_game()
    prices = np.array(env.price_history)
    assert np.all(prices[:, 1] == prices[:, 0]) == False  # noqa E712
Example #4
0
def test_delayed_learning():
    possible_prices = [0.0, 1.0, 2.0, 3.0]
    agent = DQN(decision=EpsilonGreedy(eps=0.0),
                replay_memory=ReplayBuffer(50),
                batch_size=1)
    state = tuple(random.choices(possible_prices, k=2))
    agent.play_price(state, possible_prices, 0, 0)
    weights_before_learning = copy.deepcopy(agent.qnetwork_local.get_weights())
    agent.learn(
        previous_reward=1.0,
        reward=10.0,
        previous_action=0.0,
        action=np.float64(1.0),
        action_space=possible_prices,
        previous_state=state,
        state=state,
        next_state=state,
    )
    assert np.equal(np.array(weights_before_learning[0]),
                    np.array(agent.qnetwork_local.get_weights()[0])).all()
    agent.learn(
        previous_reward=1.0,
        reward=10.0,
        previous_action=0.0,
        action=np.float64(1.0),
        action_space=possible_prices,
        previous_state=state,
        state=state,
        next_state=state,
    )
    assert (np.equal(np.array(weights_before_learning[0]),
                     np.array(agent.qnetwork_local.get_weights()[0])).all() ==
            False  # noqa E712
            )
Example #5
0
def test_play_optimal_action():
    possible_prices = [1.0, 2.0]
    agent = DQN(decision=EpsilonGreedy(eps=0.0),
                replay_memory=ReplayBuffer(50),
                batch_size=1)
    state = tuple(random.choices(possible_prices, k=2))
    agent.play_price(state, possible_prices, 0, 0)
    for _ in range(10):
        agent.learn(
            previous_reward=1.0,
            reward=10.0,
            previous_action=0.0,
            action=np.float64(1.0),
            action_space=possible_prices,
            previous_state=state,
            state=state,
            next_state=state,
        )
        agent.learn(
            previous_reward=1.0,
            reward=-10.0,
            previous_action=0.0,
            action=np.float64(2.0),
            action_space=possible_prices,
            previous_state=state,
            state=state,
            next_state=state,
        )
    assert agent.play_price(state, possible_prices, 1, 1) == 1.0
Example #6
0
def test_factory_init():
    env = DiscreteSynchronEnvironment(
        n_periods=1,
        n_prices=100,
        agents=[
            DQN(decision=EpsilonGreedy(eps=1.0), marginal_cost=0.0),
            DQN(decision=EpsilonGreedy(eps=1.0), marginal_cost=2.0),
        ],
    )
    env.play_game()
    assert np.array(env.agents[0].replay_memory.sample(1)[0] ==
                    env.agents[1].replay_memory.sample(1)[0]).all()
    assert np.array(env.agents[0].replay_memory.sample(1)[3] ==
                    env.agents[1].replay_memory.sample(1)[3]).all()
    assert np.array(env.agents[0].replay_memory.sample(1)[2] !=
                    env.agents[1].replay_memory.sample(1)[2]).all()
Example #7
0
def test_environment_advanced_qlearning():
    test_1 = DiscreteSynchronEnvironment(
        n_periods=10000,
        possible_prices=[2, 3],
        demand=LogitDemand(),
        agents=[
            Qlearning(discount=0.95, learning_rate=0.3, decision=EpsilonGreedy(eps=0.1)),
            Qlearning(
                discount=0.95, learning_rate=0.3, marginal_cost=4.0, quality=5.0, decision=EpsilonGreedy(eps=0.1)
            ),
            AlwaysDefectAgent(marginal_cost=0.1),
        ],
    )

    assert test_1.play_game()
Example #8
0
def test_update_network():
    possible_prices = [0.0, 1.0, 2.0, 3.0]
    agent = DQN(decision=EpsilonGreedy(eps=0.0),
                replay_memory=ReplayBuffer(50),
                batch_size=1,
                update_target_after=10)
    state = tuple(random.choices(possible_prices, k=2))
    agent.play_price(state, possible_prices, 0, 0)
    assert np.isclose(np.array(agent.qnetwork_local.get_weights()[0]),
                      np.array(agent.qnetwork_target.get_weights()[0])).all()
    for _ in range(10):
        agent.learn(
            previous_reward=1.0,
            reward=10.0,
            previous_action=0.0,
            action=np.float64(1.0),
            action_space=possible_prices,
            previous_state=state,
            state=state,
            next_state=state,
        )
    assert (np.equal(np.array(agent.qnetwork_local.get_weights()[0]),
                     np.array(agent.qnetwork_target.get_weights()[0])).all() ==
            False  # noqa E712, W503
            )
    agent.learn(
        previous_reward=1.0,
        reward=10.0,
        previous_action=0.0,
        action=np.float64(1.0),
        action_space=possible_prices,
        previous_state=state,
        state=state,
        next_state=state,
    )
    assert np.isclose(np.array(agent.qnetwork_local.get_weights()[0]),
                      np.array(agent.qnetwork_target.get_weights()[0])).all()
Example #9
0
def test_random_action_selection():
    np.random.seed(1)
    possible_prices = [1.0, 2.0]
    agent = DiffDQN(decision=EpsilonGreedy(eps=0.0),
                    replay_memory=ReplayBuffer(2),
                    batch_size=1)
    state = tuple(random.choices(possible_prices, k=2))
    agent.play_price(state, possible_prices, 0, 0)

    # same action values (all weights are zreo)
    agent.play_price(state, possible_prices, 0, 0)
    weights = agent.qnetwork_local.get_weights()
    for w in weights:
        w[w != 0.0] = 0.0
    agent.qnetwork_local.set_weights(weights)
    played_prices = []
    for _ in range(10):
        played_prices.append(agent.play_price(state, possible_prices, 0, 0))
    assert len(set(played_prices)) == 2

    # learn that 1.0 is better
    for _ in range(10):
        agent.learn(
            previous_reward=1.0,
            reward=10.0,
            previous_action=0.0,
            action=np.float64(1.0),
            action_space=possible_prices,
            previous_state=state,
            state=state,
            next_state=state,
        )
    played_prices = []
    for _ in range(10):
        played_prices.append(agent.play_price(state, possible_prices, 0, 0))
    assert len(set(played_prices)) == 1
    assert list(set(played_prices))[0] == 1
Example #10
0
def test_play_price():
    agent = DQN(decision=EpsilonGreedy(eps=0.0))
    assert agent.play_price((1.0, ), [1.0, 1.0], 0, 0) == 1.0

    agent = DiffDQN(decision=EpsilonGreedy(eps=0.0))
    assert agent.play_price((1.0, ), [1.0, 1.0], 0, 0) == 1.0
Example #11
0
def test_epsilon_greedy():
    for e in [0.00001, 0.5, 0.1, 0.999999]:
        assert EpsilonGreedy(eps=e).epsilon(100000, 100000) == e
Example #12
0
def test_play_price():
    agent = Qlearning(decision=EpsilonGreedy(eps=0.0))
    p = agent.play_price((1.0, 1.0), [1.0, 2.0], 0, 0)
    assert p == 1.0 or p == 2.0