コード例 #1
0
ファイル: test_experiment.py プロジェクト: rldotai/varcompfa
    def test_run_with_callbacks(self):
        # Set up environment and policy
        env = gym.make('MountainCar-v0')
        pol = vcf.DiscreteRandomControl(env.action_space.n)

        # Define an agent
        phi_1 = vcf.BiasUnit()
        td_1 = vcf.TD(len(phi_1))
        params_1 = {
            'alpha': 0.01,
            'gm': 0.999,
            'gm_p': 0.999,
            'lm': 0.1,
        }
        agent_1 = vcf.Agent(td_1, phi_1, params_1)
        agents = [agent_1]

        # Set up the experiment
        experiment = vcf.LiveExperiment(env, pol, agents)

        # Set up testing callbacks
        cbk = _CheckCallback()

        # Try running the experiment
        num_eps = 10
        max_steps = 10
        experiment.run(num_eps, max_steps, callbacks=[cbk])

        # Check that the callbacks ran properly
        assert (cbk.experiment_begin > 0)
        assert (cbk.experiment_end > 0)
        assert (cbk.episode_begin > 0)
        assert (cbk.episode_end > 0)
        assert (cbk.step_begin > 0)
        assert (cbk.step_end > 0)
コード例 #2
0
ファイル: test_experiment.py プロジェクト: rldotai/varcompfa
    def test_run(self):
        # Set up environment and policy
        env = gym.make('MountainCar-v0')
        pol = vcf.DiscreteRandomControl(env.action_space.n)

        # Define an agent
        phi_1 = vcf.BiasUnit()
        td_1 = vcf.TD(len(phi_1))
        params_1 = {
            'alpha': 0.01,
            'gm': 0.999,
            'gm_p': 0.999,
            'lm': 0.1,
        }
        agent_1 = vcf.Agent(td_1, phi_1, params_1)
        agents = [agent_1]

        # Set up the experiment
        experiment = vcf.LiveExperiment(env, pol, agents)

        # Try running the experiment
        num_eps = 10
        max_steps = 10

        experiment.run(num_eps, max_steps, callbacks=[])
コード例 #3
0
    def test_terminal_context(self):
        # Set up the agent
        param_funcs = {
            'alpha': 0.05,
            'gm': vcf.Constant(0.9999, 0),
            'gm_p': vcf.Constant(0.9999, 0),
            'lm': 0.1
        }
        phi = vcf.BinaryVector(10)
        algo = vcf.TD(len(phi))
        agent = vcf.Agent(algo, phi, param_funcs)

        # No base context
        base_ctx = {}
        term_ctx = agent.terminal_context(base_ctx)
        assert (isinstance(term_ctx, dict))
        assert (term_ctx['done'] == True)
        assert (term_ctx['r'] == 0)
        assert (all(term_ctx['xp'] == 0))

        # Nonsense base context (should still be present)
        base_ctx = {'__' + str(i): i**2 for i in range(10)}
        term_ctx = agent.terminal_context(base_ctx)
        assert (isinstance(term_ctx, dict))
        assert (term_ctx['done'] == True)
        assert (term_ctx['r'] == 0)
        assert (all(term_ctx['xp'] == 0))
        assert (all(key in term_ctx for key in base_ctx.keys()))
        assert (term_ctx[key] == val for key, val in base_ctx.items())
コード例 #4
0
 def test_setup(self):
     # Set up the agent
     param_funcs = {
         'alpha': 0.05,
         'gm': vcf.Constant(0.9999, 0),
         'gm_p': vcf.Constant(0.9999, 0),
         'lm': 0.1
     }
     phi = vcf.BinaryVector(10)
     algo = vcf.TD(len(phi))
     agent = vcf.Agent(algo, phi, param_funcs)
コード例 #5
0
        'gm_p': vcf.Constant(0.999, 0),
        'lm': vcf.Constant(0.01, 0),
    }
    q_phi = vcf.BinaryVector(ns)
    q_algo = vcf.DiscreteQ(len(q_phi), na, epsilon=0.05)
    control = vcf.Agent(q_algo, q_phi, q_params)

    # Define some other agents that simply learn the value function
    phi1 = vcf.BinaryVector(ns)
    td_params = {
        'alpha': vcf.Constant(0.01),
        'gm': vcf.Constant(0.999, 0),
        'gm_p': vcf.Constant(0.999, 0),
        'lm': vcf.Constant(0.01, 0),
    }
    td_agent1 = vcf.Agent(vcf.TD(len(phi1)), phi1, td_params)

    phi2 = vcf.BiasUnit()
    td_params2 = {
        'alpha': 0.01,
        'gm': 0.9,
        'gm_p': 0.9,
        'lm': 0.9,
    }
    td_agent2 = vcf.Agent(vcf.TD(len(phi2)), phi2, td_params2)

    # Define the agents to update
    agents = [control, td_agent1, td_agent2]

    # Set up the experiment
    experiment = vcf.PolicyEvaluation(env, control, agents=agents)
コード例 #6
0
    tiling_3 = vcf.UniformTiling(env.observation_space, 11)
    tiling_4 = vcf.UniformTiling(env.observation_space, 19)
    # Convert tile indices to binary vector
    bvec_1 = vcf.BinaryVector(tiling_1.high, tiling_1)
    bvec_2 = vcf.BinaryVector(tiling_2.high, tiling_2)
    bvec_3 = vcf.BinaryVector(tiling_3.high, tiling_3)
    bvec_4 = vcf.BinaryVector(tiling_4.high, tiling_4)
    # Concatenate binary vectors
    phi = vcf.Union(bias_unit, bvec_1, bvec_2, bvec_3, bvec_4)

    # Set up agents
    nf = len(phi)
    na = env.action_space.n
    # Control agent, value function learning, delta agent, delta-squared agent
    control_agent = vcf.DiscreteQ(nf, na, epsilon=0.05)
    value_agent = vcf.TD(nf)
    delta_agent = vcf.TD(nf)
    square_agent = vcf.TD(nf)

    # Zero value initialization
    control_agent.w *= 0
    control_agent.w += np.random.normal(0, 1, control_agent.w.shape)

    # Fixed parameters
    alpha_0 = 0.05
    gamma = 0.999
    lmbda = 0.0

    # Set up tracking
    episodes = []
    stepcount = []