Ejemplo n.º 1
0
def test_guarded_treasures_edgecases():
    from GuardedTreasures import GuardedTreasures

    def only_take_guarded_treasures(prompt, *meta):
        current_room_has_guard = prompt[-1]
        return 1 if current_room_has_guard == 1 else 0

    i = 0
    while i < 10:
        i += 1
        result = run_environment(GuardedTreasures, only_take_guarded_treasures,
                                 i * 10)
        assert result['total_reward'] >= 0
        if result['total_reward'] > 0:
            break
    assert (i < 10)

    def always_take_treasure(prompt, *meta):
        return 1

    i = 0
    while i < 10:
        i += 1
        result = run_environment(GuardedTreasures, always_take_treasure,
                                 i * 10)
        if result['total_reward'] < 0:
            break
    assert (i < 10)

    def never_take_treasure(prompt, *meta):
        return 0

    result = run_environment(GuardedTreasures, never_take_treasure, 10)
    assert result['total_reward'] == 0
Ejemplo n.º 2
0
def test_incentivize_zero_edgecases():
    from IncentivizeZero import IncentivizeZero

    def always_zero(prompt, *meta):
        return 0

    result = run_environment(IncentivizeZero, always_zero, 10)
    assert result['total_reward'] == 9

    def always_1(prompt, *meta):
        return 1

    result = run_environment(IncentivizeZero, always_1, 10)
    assert result['total_reward'] == -9

    def play_zero_if_last_reward_was_5(prompt, *meta):
        last_reward = prompt[-2]
        if last_reward == 5:
            return 0
        else:
            return 5

    result = run_environment(IncentivizeZero, play_zero_if_last_reward_was_5,
                             10)
    assert result['total_reward'] == 9
Ejemplo n.º 3
0
def test_binocular_vision_edgecases():
    from abstract.BinocularVision import BinocularVision
    from util import cantor_pairing_fnc

    def Game3D(action_sequence):
        return 0

    def LeftCamera(matrix3D):
        return 1

    def RightCamera(matrix3D):
        return 2

    expected_obs = cantor_pairing_fnc(LeftCamera(0), RightCamera(0))

    env = BinocularVision(Game3D, LeftCamera, RightCamera)

    result = run_environment(env, repetitive, 10)
    assert result['total_reward'] == 9

    def zero_checker(prompt, *meta):
        obs = prompt[-1]
        if obs == 0:
            return 1
        if obs == expected_obs:
            return 2
        raise ValueError("Zero_checker saw an unexpected observation")

    result = run_environment(env, zero_checker, 10)
    assert result['total_reward'] == -9
Ejemplo n.º 4
0
def test_self_insert_edgecases():
    from abstract.SelfInsert import self_insert

    class Dummy_Env:
        def __init__(self):
            self.num_legal_actions = 100
            self.num_possible_obs = -1
            self.fnc = dummy_env

    def dummy_env(T, play):
        return 0, 0

    env = self_insert(Dummy_Env)

    result = run_environment(env, repetitive, 10)
    assert result['total_reward'] == 9

    def tuple_detector(prompt, *meta):
        for x in prompt:
            if '__iter__' in dir(x):
                return 1
        return 0

    result = run_environment(env, tuple_detector, 10)
    assert result['total_reward'] == -9
Ejemplo n.º 5
0
def test_crying_baby_edgecases():
    from CryingBaby import CryingBaby, LAUGH, CRY, FEED, DONTFEED

    def always_cries(prompt, *meta):
        return CRY

    result = run_environment(CryingBaby, always_cries, 10)
    assert result['total_reward'] == -8  # Baby is hardcoded to initially laugh

    def always_laughs(prompt, *meta):
        return LAUGH

    result = run_environment(CryingBaby, always_laughs, 10)
    assert result['total_reward'] == 10

    def self_aware(prompt, *meta):
        initial_obs = prompt[1]
        am_i_adult = (initial_obs == LAUGH)
        if am_i_adult:
            return DONTFEED
        else:
            return LAUGH

    result = run_environment(CryingBaby, self_aware, 10)
    assert result['total_reward'] == 10
Ejemplo n.º 6
0
def test_tic_tac_toe():
    from vanilla.TicTacToe import TicTacToe1, TicTacToe2, TicTacToe3

    def plays_blindly(prompt, *meta):
        return ((1+len(prompt))/3)%9

    for env in [TicTacToe1, TicTacToe2, TicTacToe3]:
        result = run_environment(env, plays_blindly, 100)
        if result['total_reward'] == 0:
            result = run_environment(env, plays_blindly, 1000)
            assert result['total_reward'] > 0
Ejemplo n.º 7
0
def test_false_memories_edgecases():
    from FalseMemories import FalseMemories

    result = run_environment(FalseMemories, repetitive, 10)
    assert result['total_reward'] == 9

    result = run_environment(FalseMemories, non_repetitive, 10)
    assert result['total_reward'] == -9

    def lengthchecker(prompt, *meta):
        return 1 if len(prompt) > 5 else 0

    result = run_environment(FalseMemories, lengthchecker, 2)
    assert result['total_reward'] == -1
    result = run_environment(FalseMemories, lengthchecker, 3)
    assert result['total_reward'] == -2
    result = run_environment(FalseMemories, lengthchecker, 4)
    assert result['total_reward'] == -1
    result = run_environment(FalseMemories, lengthchecker, 5)
    assert result['total_reward'] == 0
    result = run_environment(FalseMemories, lengthchecker, 6)
    assert result['total_reward'] == 1

    def impatient(prompt, *meta):
        return 1 if len(prompt) < 5 else 0

    result = run_environment(FalseMemories, impatient, 10)
    assert result['total_reward'] == 7
Ejemplo n.º 8
0
def test_dejavu_edgecases():
    from DejaVu import DejaVu

    result = run_environment(DejaVu, repetitive, 10)
    assert result['total_reward'] == 9

    result = run_environment(DejaVu, non_repetitive, 10)
    assert result['total_reward'] == -9

    def parity(prompt, *meta):
        return ((len(prompt) + 1) / 3) % 2

    result = run_environment(DejaVu, parity, 10)
    assert result['total_reward'] == -1
    result = run_environment(DejaVu, parity, 11)
    assert result['total_reward'] == 0
Ejemplo n.º 9
0
def test_runtime_inspector_edgecases():
    from RuntimeInspector import PunishFastAgent, PunishSlowAgent

    result1 = run_environment(PunishFastAgent, repetitive, 10)
    result2 = run_environment(PunishSlowAgent, repetitive, 10)
    assert result1['total_reward'] == -9
    assert result2['total_reward'] == 9

    def timewaster(prompt, *meta):
        x = 25 * len(prompt)
        while x > 0:
            x = x - 1
        return 0

    result1 = run_environment(PunishFastAgent, timewaster, 10)
    result2 = run_environment(PunishSlowAgent, timewaster, 10)
    assert result1['total_reward'] == 9
    assert result2['total_reward'] == -9
Ejemplo n.º 10
0
def test_bandits():
    from vanilla.Bandit import Bandit1, Bandit2, Bandit3, Bandit4, Bandit5

    def incrementer(prompt, *meta):
        return (1+len(prompt))/3

    for bandit in [Bandit1, Bandit2, Bandit3, Bandit4, Bandit5]:
        result = run_environment(bandit, incrementer, 10)
        assert result['total_reward'] > 0
Ejemplo n.º 11
0
def test_mazes():
    from vanilla.Maze import Maze1, Maze2, Maze3, Maze4, Maze5

    def learns_about_bad_moves(prompt, *meta):
        if prompt[-2] > 0:
            assert prompt[-1] == 1  # Rewards are always accompanied by reset

        bad_moves = {x:[] for x in range(10)}
        for i in range(len(prompt)):
            is_obs = (i%2)==1
            if is_obs and i>1:
                obs = prompt[i]
                prev_obs = prompt[i-3]
                prev_action = prompt[i-2]
                prev_reward = prompt[i-1]
                if (obs==prev_obs) or (obs==1 and prev_reward==0):
                    if not(prev_action in bad_moves[prev_obs]):
                        bad_moves[prev_obs] += [prev_action]

        curr_room = prompt[-1]
        while True:
            door = int(random()*4)
            if len(bad_moves[curr_room])==4:
                return door
            if not(door in bad_moves[curr_room]):
                return door

    for maze in [Maze1, Maze2, Maze3, Maze4]:
        result = run_environment(maze, learns_about_bad_moves, 50)
        if result['total_reward'] == 0:
            result = run_environment(maze, learns_about_bad_moves, 250)
            assert result['total_reward']>0

    result = run_environment(Maze5, learns_about_bad_moves, 100)
    if result['total_reward'] == 0:
        result = run_environment(maze, learns_about_bad_moves, 500)
        assert result['total_reward']>0

    def always_goes_north(prompt, *meta):
        return 0

    for maze in [Maze1, Maze2, Maze3, Maze4, Maze5]:
        result = run_environment(maze, always_goes_north, 50)
        assert result['total_reward'] == 0
Ejemplo n.º 12
0
def test_determinism_inspector_edgecases():
    from DeterminismInspector import PunishDeterministicAgent
    from DeterminismInspector import PunishNondeterministicAgent

    result1 = run_environment(PunishDeterministicAgent, repetitive, 10)
    result2 = run_environment(PunishNondeterministicAgent, repetitive, 10)
    assert result1['total_reward'] == -9
    assert result2['total_reward'] == 9

    memory = [0]

    def never_repeater(prompt, *meta):
        action = memory[0]
        memory[0] += 1
        return action

    result1 = run_environment(PunishDeterministicAgent, never_repeater, 10)
    result2 = run_environment(PunishNondeterministicAgent, never_repeater, 10)
    assert result1['total_reward'] == 9
    assert result2['total_reward'] == -9
def awareness_benchmark(T, num_steps, include_slow_envs=False):
    results = {}
    for name, env in envs.items():
        if not (include_slow_envs):
            if any([slowname in name for slowname in slow_envs]):
                continue

        result = run_environment(env, T, num_steps)
        results[name] = result['total_normalized_reward'] / num_steps

    return results
Ejemplo n.º 14
0
def test_ignore_rewards_edgecases():
    from IgnoreRewards import IgnoreRewards

    result = run_environment(IgnoreRewards, repetitive, 10)
    assert result['total_reward'] == 9

    result = run_environment(IgnoreRewards, non_repetitive, 10)
    assert result['total_reward'] == 9

    def count_positive_rewards(prompt, *meta):
        i = 0
        s = 0
        while i < len(prompt):
            if (i % 3) == 0:
                if prompt[i] > 0:
                    s += 1
            i += 1

        return s

    result = run_environment(IgnoreRewards, count_positive_rewards, 10)
    assert result['total_reward'] == -7
Ejemplo n.º 15
0
def test_backward_consciousness_edgecases():
    from BackwardConsciousness import BackwardConsciousness

    result = run_environment(BackwardConsciousness, repetitive, 10)
    assert result['total_reward'] == 9

    def stubborn_agent(prompt, *meta):
        # Take first two actions 1,2 no matter what
        if len(prompt) < 3:
            return 1
        if len(prompt) < 6:
            return 2

        # Choose later actions entirely based on first two actions
        action1 = prompt[2]
        action2 = prompt[5]
        return 3 if (action1, action2) == (1, 2) else 4

    result = run_environment(BackwardConsciousness, stubborn_agent, 5)
    assert result['total_reward'] == 0
    result = run_environment(BackwardConsciousness, stubborn_agent, 10)
    assert result['total_reward'] == -5
    result = run_environment(BackwardConsciousness, stubborn_agent, 15)
    assert result['total_reward'] == -10
Ejemplo n.º 16
0
def test_guess_the_number():
    from vanilla.GuessTheNumber import GuessTheNumber1
    from vanilla.GuessTheNumber import GuessTheNumber2
    from vanilla.GuessTheNumber import GuessTheNumber3

    for env in [GuessTheNumber1, GuessTheNumber2, GuessTheNumber3]:
        blank_observations = []

        def blank_obs_observer(prompt, num_legal_actions, num_possible_obs, blank_obs=blank_observations):
            obs = prompt[-1]
            if obs == 0:
                blank_obs += [obs]

            return (((1+len(prompt))/3)%10)+1

        result = run_environment(env, blank_obs_observer, 100)
        assert result['total_reward'] == len(blank_observations)-1
Ejemplo n.º 17
0
def test_paper_rock_scissors():
    from vanilla.PaperRockScissors import PaperRockScissors1
    from vanilla.PaperRockScissors import PaperRockScissors2
    from vanilla.PaperRockScissors import PaperRockScissors3
    from vanilla.PaperRockScissors import PAPER, ROCK, SCISSORS

    def always_plays_paper(prompt, *meta):
        reward, obs = prompt[-2], prompt[-1]
        if obs != 0:
            if obs == PAPER:
                assert reward == 1
            elif obs == ROCK:
                assert reward == 2
            else:
                assert reward == 0

        return PAPER

    for env in [PaperRockScissors1, PaperRockScissors2, PaperRockScissors3]:
        result = run_environment(env, always_plays_paper, 50)
        assert result['total_reward'] > 0