예제 #1
0
                i += 1
            else:
                self.total_score += env.act(2)


from deer.agent import NeuralAgent
from deer.learning_algos.q_net_keras import MyQNetwork
from deer.learning_algos.AC_net_keras import MyACNetwork
from deer.policies import EpsilonGreedyPolicy

rng = np.random.RandomState(123456)

# TODO : best algorithm, hyperparameter tuning
if args.network == 'DQN':
    network = MyQNetwork(environment=env,
                         batch_size=32,
                         double_Q=True,
                         random_state=rng)
elif args.network == 'DDPG':
    network = MyACNetwork(environment=env, batch_size=32, random_state=rng)

agent = NeuralAgent(env,
                    network,
                    train_policy=EpsilonGreedyPolicy(network, env.nActions(),
                                                     rng, 0.0),
                    replay_memory_size=1000,
                    batch_size=32,
                    random_state=rng)

#agent.attach(bc.VerboseController())
if args.fname == 'baseline':
    agent = EmpiricalTreatmentAgent(env)
예제 #2
0
if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(123456)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = Toy_env(rng)

    # --- Instantiate qnetwork ---
    qnetwork = MyQNetwork(env, parameters.rms_decay, parameters.rms_epsilon,
                          parameters.momentum, parameters.clip_norm,
                          parameters.freeze_interval, parameters.batch_size,
                          parameters.update_rule, rng)

    train_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.1)
    test_policy = EpsilonGreedyPolicy(qnetwork, env.nActions(), rng, 0.)

    # --- Instantiate agent ---
    agent = NeuralAgent(env,
                        qnetwork,
                        parameters.replay_memory_size,
                        max(env.inputDimensions()[i][0]
                            for i in range(len(env.inputDimensions()))),
                        parameters.batch_size,
                        rng,
                        train_policy=train_policy,
                        test_policy=test_policy)
예제 #3
0
from deer.learning_algos.q_net_keras import MyQNetwork
from deer.learning_algos.AC_net_keras import MyACNetwork
import deer.experiment.base_controllers as bc
from deer.policies import EpsilonGreedyPolicy
from misc.other_controllers import GaussianNoiseController
from misc.GaussianNoiseExplorationPolicy import GaussianNoiseExplorationPolicy
env = CellEnvironment(args.obs_type, args.resize, args.reward, args.network,
                      args.special)

rng = np.random.RandomState(777)

# TODO : best algorithm, hyperparameter tuning
if args.network == 'DQN':
    network = MyQNetwork(environment=env,
                         batch_size=32,
                         freeze_interval=args.epochs[1],
                         double_Q=True,
                         random_state=rng)
    agent = NeuralAgent(env,
                        network,
                        replay_memory_size=min(
                            int(args.epochs[0] * args.epochs[1] * 1.1),
                            100000),
                        batch_size=32,
                        random_state=rng)
    agent.setDiscountFactor(0.95)
    agent.attach(bc.FindBestController(validationID=0,
                                       unique_fname=args.fname))
    agent.attach(bc.VerboseController())
    agent.attach(bc.TrainerController())
    agent.attach(
예제 #4
0
"""

import numpy as np

from deer.agent import NeuralAgent
from deer.learning_algos.q_net_keras import MyQNetwork
from Toy_env import MyEnv as Toy_env
import deer.experiment.base_controllers as bc

rng = np.random.RandomState(123456)

# --- Instantiate environment ---
env = Toy_env(rng)

# --- Instantiate qnetwork ---
qnetwork = MyQNetwork(environment=env, random_state=rng)

# --- Instantiate agent ---
agent = NeuralAgent(env, qnetwork, random_state=rng)

# --- Bind controllers to the agent ---
# Before every training epoch, we want to print a summary of the agent's epsilon, discount and
# learning rate as well as the training epoch number.
agent.attach(bc.VerboseController())

# During training epochs, we want to train the agent after every action it takes.
# Plus, we also want to display after each training episode (!= than after every training) the average bellman
# residual and the average of the V values obtained during the last episode.
agent.attach(bc.TrainerController())

# We also want to interleave a "test epoch" between each training epoch.