예제 #1
0
def test_save_load():
    cfg = {"rng": np.random}
    input_shape = 8
    output_shape = 8
    instructions = \
        {
            "init_std": 0.05,
            "layers": [64, 64],
            "layer_functions": ['relu', 'relu'],
            "layer_extras": ['bn', 'bn'],
            "output_function": 'linear',
            "output_extras": 'bn',
        }
    policy = FeedForward(input_shape, output_shape, None, cfg)
    policy.build_model(instructions)

    print("BUILT POLICY LAYERS:")
    for layer in policy.model:
        print(layer)

    inp = np.ones(input_shape)
    out = policy.activate(inp)

    print("\nOUTPUT ON ONES BEFORE SAVING:", out)
    policy.save("data/experiments/exp_name/epochs/epoch_0/policy")
    policy.load("data/experiments/exp_name/epochs/epoch_0/policy")

    out = policy.activate(inp)
    print("OUTPUT ON ONES AFTER SAVING:", out)
예제 #2
0
def test_bn():
    cfg = ConfigLoader.load_config(file_name="test_config.json")
    input_shape = 8
    output_shape = 8
    instructions = cfg["policy"]
    #parser = MiscParsingFunctions.parse_policy_action_function(cfg["policy"]["action_parser"])
    policy = FeedForward(input_shape, output_shape, None, cfg)
    policy.build_model(instructions)

    print("BUILT POLICY LAYERS:")
    for layer in policy.model:
        print(layer)

    inp = np.ones(input_shape)
    output = policy.activate(inp)

    print("POLICY OUTPUT ON ONES OF SHAPE", inp.shape, "=", output.shape,
          "EXPECTED", output_shape)
    action_parsers = {
        "linear": parsers.linear_parse,
        "random sample": parsers.random_sample,
        "arg max": parsers.argmax_sample
    }

    for name, parser in action_parsers.items():
        policy.action_parser = parser
        print("ATTEMPTING PARSER", name)
        print("RAW POLICY OUTPUT:", policy.activate(inp)[0])
        print("SUM:", sum(policy.activate(inp)[0]))
        action = policy.get_action(inp)
        print("POLICY ACTION FROM PARSER", name, "=", action)

    print()
예제 #3
0
def run_test():
    cfg = {"rng": np.random}
    input_shape = 2
    output_shape = 2
    instructions = \
    {
        "init_std": 0.05,
        "layers" : [1],
        "layer_functions" : ['relu'],
        "layer_extras" : ['bn'],
        "output_function" : 'linear',
        "output_extras" : 'bn',
    }
    policy = FeedForward(input_shape, output_shape, None, cfg)
    policy.build_model(instructions)

    print("BUILT POLICY LAYERS:")
    for layer in policy.model:
        print(layer)

    flat = np.random.randn(policy.num_params)

    print("POLICY FLAT BEFORE SETTING:", policy.get_trainable_flat())
    policy.set_trainable_flat(flat)
    print("POLICY FLAT AFTER SETTING:", policy.get_trainable_flat())

    print("FLAT SHOULD NOW BE:", flat)
예제 #4
0
def test_batch_bn():
    cfg = ConfigLoader.load_config(file_name="test_config.json")
    input_shape = 8
    output_shape = 8
    instructions = cfg["policy"]
    parser = MiscParsingFunctions.parse_policy_action_function(
        cfg["policy"]["action_parser"])
    policy = FeedForward(input_shape, output_shape, parser, cfg)
    policy.build_model(instructions)

    print("BUILT POLICY LAYERS:")
    for layer in policy.model:
        print(layer)

    batch = [np.ones(input_shape) for _ in range(500)]
    output = policy.activate_batch(batch)

    print("OUTPUT ON ONES BATCH OF 500:", output.shape, "EXPECTED 500 OF",
          output_shape)

    action_parsers = {
        "linear": parsers.linear_parse,
        "random sample": parsers.random_sample,
        "arg max": parsers.argmax_sample
    }

    for name, parser in action_parsers.items():
        policy.action_parser = parser
        action = policy.get_actions_on_batch(batch)
        print("POLICY ACTIONS FROM PARSER", name, "=", action)
    print()
예제 #5
0
def run_test():
    cfg = {"rng": np.random}
    input_shape = 8
    output_shape = 8
    instructions = \
    {
        "init_std": 0.05,
        "layers" : [64,64],
        "layer_functions" : ['relu', 'relu'],
        "layer_extras" : ['bn', 'bn'],
        "output_function" : 'linear',
        "output_extras" : 'bn',
    }
    policy = FeedForward(input_shape, output_shape, None, cfg)
    policy.build_model(instructions)

    print("BUILT POLICY LAYERS:")
    for layer in policy.model:
        print(layer)
예제 #6
0
def run_test():
    # cfg = {"rng": np.random}
    # input_shape = 8
    # output_shape = 8
    # instructions = \
    #     {
    #         "init_std": 0.05,
    #         "layers": [64, 64],
    #         "layer_functions": ['relu', 'relu'],
    #         "layer_extras": ['bn', 'bn'],
    #         "output_function": 'linear',
    #         "output_extras": 'bn',
    #     }

    cfg = ConfigLoader.load_config(file_name="test_config.json")
    env = EnvironmentFactory.get_from_config(cfg)
    input_shape = env.get_policy_input_shape()
    output_shape = env.get_policy_output_shape()
    instructions = cfg["policy"]
    cfg["rng"] = np.random.RandomState(cfg["seed"])

    policy = FeedForward(input_shape, output_shape, None, cfg)
    policy.build_model(instructions)

    print("BUILT POLICY LAYERS:")
    for layer in policy.model:
        print(layer)

    num = np.prod(input_shape)
    vbn = [np.random.randn(num) for _ in range(1000)]
    inp = np.ones(num)
    out = policy.activate(inp)

    print("\nOUTPUT ON ONES BEFORE VBN:", out)
    policy.compute_virtual_normalization(vbn)
    out = policy.activate(inp)
    print("OUTPUT ON ONES AFTER VBN:", out)

    policy.save("data/test")
    out = policy.activate(inp)
    print("OUTPUT ON ONES AFTER SAVE:", out)
    del policy

    policy = FeedForward(input_shape, output_shape, None, cfg)
    policy.build_model(instructions)
    policy.load("data/test")
    out = policy.activate(inp)
    print("OUTPUT ON ONES AFTER LOAD:", out)
    policy.compute_virtual_normalization(vbn)
    out = policy.activate(inp)
    print("OUTPUT ON ONES AFTER LOAD AND VBN:", out)
예제 #7
0
def test_save_load_vbn():
    cfg = {"rng": np.random}
    input_shape = 8
    output_shape = 8
    instructions = \
        {
            "init_std": 0.05,
            "layers": [64, 64],
            "layer_functions": ['relu', 'relu'],
            "layer_extras": ['bn', 'bn'],
            "output_function": 'linear',
            "output_extras": 'bn',
        }
    policy = FeedForward(input_shape, output_shape, None, cfg)
    policy.build_model(instructions)

    print("BUILT POLICY LAYERS:")
    for layer in policy.model:
        print(layer)

    vbn = [np.random.randn(input_shape) for _ in range(1000)]
    policy.compute_virtual_normalization(vbn)

    inp = np.ones(input_shape)
    out = policy.activate(inp)

    print("\nOUTPUT ON ONES WITH VBN BEFORE SAVING:", out)
    policy.save("data/experiments/exp_name/epochs/epoch_0/policy")

    out = policy.activate(inp)
    print("\nOUTPUT ON ONES WITH VBN AFTER SAVING:", out)

    policy.set_trainable_flat(policy.get_trainable_flat() +
                              np.random.randn(policy.num_params))
    out = policy.activate(inp)
    print("\nJIGGLED OUTPUT ON ONES WITH VBN BEFORE LOADING", out)

    policy.load("data/experiments/exp_name/epochs/epoch_0/policy")
    out = policy.activate(inp)
    print("OUTPUT ON ONES WITH VBN AFTER LOADING:", out)