Exemple #1
0
    def reinforce_train_cem(self,
                            steps=60000,
                            visualize=False,
                            verbose=1,
                            nb_steps_warmup=10000,
                            save_path=r"D:\Data\markets\weights",
                            save_weights_name="cem_CADJPY_weights.h5f",
                            log_interval=1000):
        memory = EpisodeParameterMemory(limit=200, window_length=1)
        nb_actions = self.env.action_space.n

        agent = CEMAgent(
            model=self.model,
            nb_actions=nb_actions,
            memory=memory,
            nb_steps_warmup=nb_steps_warmup,
            processor=MultiInputProcessor(nb_inputs=len(self.model.inputs)))
        agent.compile()
        agent.fit(self.env,
                  nb_steps=steps,
                  visualize=visualize,
                  verbose=verbose,
                  log_interval=log_interval)

        pathlib.Path(save_path).mkdir(parents=True, exist_ok=True)
        file_path = os.path.join(save_path, save_weights_name)
        agent.save_weights(filepath=file_path, overwrite=True)
Exemple #2
0
def main(env_name, nb_steps):
    # Get the environment and extract the number of actions.
    env = gym.make(env_name)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    input_shape = (1, ) + env.observation_space.shape
    model = create_nn_model(input_shape, nb_actions)

    # Finally, we configure and compile our agent.
    memory = EpisodeParameterMemory(limit=450, window_length=1)

    agent = CEMAgent(model=model,
                     nb_actions=nb_actions,
                     memory=memory,
                     batch_size=50,
                     nb_steps_warmup=2000,
                     train_interval=50,
                     elite_frac=0.05)
    agent.compile()
    agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1)

    # After training is done, we save the best weights.
    agent.save_weights('cem_{}_params.h5f'.format(env_name), overwrite=True)

    # Finally, evaluate the agent
    history = agent.test(env, nb_episodes=100, visualize=False)
    rewards = np.array(history.history['episode_reward'])
    print(("Test rewards (#episodes={}): mean={:>5.2f}, std={:>5.2f}, "
           "min={:>5.2f}, max={:>5.2f}").format(len(rewards), rewards.mean(),
                                                rewards.std(), rewards.min(),
                                                rewards.max()))
Exemple #3
0
def main(env_name, nb_steps):
    # Get the environment and extract the number of actions.
    env = gym.make(env_name)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    input_shape = (1,) + env.observation_space.shape
    model = create_nn_model(input_shape, nb_actions)

    # Finally, we configure and compile our agent.
    memory = EpisodeParameterMemory(limit=450, window_length=1)

    agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                     batch_size=50, nb_steps_warmup=2000, train_interval=50,
                     elite_frac=0.05)
    agent.compile()
    agent.fit(env, nb_steps=nb_steps, visualize=False, verbose=1)

    # After training is done, we save the best weights.
    agent.save_weights('cem_{}_params.h5f'.format(env_name), overwrite=True)

    # Finally, evaluate the agent
    history = agent.test(env, nb_episodes=100, visualize=False)
    rewards = np.array(history.history['episode_reward'])
    print(("Test rewards (#episodes={}): mean={:>5.2f}, std={:>5.2f}, "
           "min={:>5.2f}, max={:>5.2f}")
          .format(len(rewards),
                  rewards.mean(),
                  rewards.std(),
                  rewards.min(),
                  rewards.max()))
class KerasCEMAgent(object):
	'''
	The cross-entropy method Learning Agent as described in http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.81.6579&rep=rep1&type=pdf
	'''

	def __init__(self, opts):
		self.metadata = {
			'discrete_actions': True,
		}

		self.opts = opts

	def configure(self, observation_space_shape, nb_actions):
		if self.opts.model_type == 1:
			# Option 1 : Simple model
			model = Sequential()
			model.add(Flatten(input_shape=(1,) + observation_space_shape))
			model.add(Dense(nb_actions))
			model.add(Activation('softmax'))
			print(model.summary())
		elif self.opts.model_type == 2:
			# Option 2: deep network
			model = Sequential()
			model.add(Flatten(input_shape=(1,) + observation_space_shape))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(16))
			model.add(Activation('relu'))
			model.add(Dense(nb_actions))
			model.add(Activation('softmax'))
			print(model.summary())

		# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
		# even the metrics!
		memory = EpisodeParameterMemory(limit=1000, window_length=1)

		self.agent = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
							  batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
		self.agent.compile()

	def train(self, env, nb_steps, visualize, verbosity):
		# Okay, now it's time to learn something! We visualize the training here for show, but this
		# slows down training quite a lot. You can always safely abort the training prematurely using
		# Ctrl + C.
		self.agent.fit(env, nb_steps=nb_steps, visualize=visualize, verbose=verbosity)

	def test(self, env, nb_episodes, visualize):
		# Finally, evaluate our algorithm for 5 episodes.
		self.agent.test(env, nb_episodes=nb_episodes, visualize=visualize)

	def load_weights(self, load_file):
		self.agent.load_weights(load_file)

	def save_weights(self, save_file, overwrite):
		# After training is done, we save the best weights.
		self.agent.save_weights(save_file, overwrite=overwrite)
Exemple #5
0
def test_single_cem_input():
    model = Sequential()
    model.add(Flatten(input_shape=(2, 3)))
    model.add(Dense(2))

    memory = EpisodeParameterMemory(limit=10, window_length=2)
    agent = CEMAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, train_interval=50)
    agent.compile()
    agent.fit(MultiInputTestEnv((3,)), nb_steps=100)
Exemple #6
0
def main():
    """Build model and train on environment."""
    env = MarketEnv(("ES", "FUT", "GLOBEX", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=3)
    #env = MarketEnv(("AAPL", "STK", "SMART", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=4)
    nb_actions = 3      # Keras-RL CEM is a discrete agent

    # Option 1 : Simple model
    model = Sequential([
        Flatten(input_shape=(1,) + env.observation_space.shape),
        Dense(nb_actions),
        Activation('softmax')
    ])

    # Option 2: deep network
    # hidden_nodes = reduce(operator.imul, env.observation_space.shape, 1)
    # model = Sequential([
    #     Flatten(input_shape=(1,) + env.observation_space.shape),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(hidden_nodes),
    #     Activation('relu'),
    #     Dense(nb_actions),
    #     Activation('softmax')
    # ])

    print(model.summary())

    param_logger = CEMParamLogger('cem_{}_params.json'.format(env.instrument.symbol))
    callbacks = [
        param_logger,
        FileLogger('cem_{}_log.json'.format(env.instrument.symbol), interval=STEPS_PER_EPISODE)
    ]

    theta_init = param_logger.read_params()     # Start with last saved params if present
    if theta_init is not None:
        print('Starting with parameters from {}:\n{}'.format(param_logger.params_filename, theta_init))

    memory = EpisodeParameterMemory(limit=EPISODES, window_length=1)        # Remember the parameters and rewards for the last `limit` episodes.
    cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=EPISODES, nb_steps_warmup=WARMUMP_EPISODES * STEPS_PER_EPISODE, train_interval=TRAIN_INTERVAL_EPISODES, elite_frac=0.2, theta_init=theta_init, processor=DiscreteProcessor(), noise_decay_const=0, noise_ampl=0)
    """
    :param memory: Remembers the parameters and rewards for the last `limit` episodes.
    :param int batch_size: Randomly sample this many episode parameters from memory before taking the top `elite_frac` to construct the next gen parameters from.
    :param int nb_steps_warmup: Run for this many steps (total) to fill memory before training
    :param int train_interval: Train (update parameters) every this many episodes
    :param float elite_frac: Take this top fraction of the `batch_size` randomly sampled parameters from the episode memory to construct new parameters.
    """
    cem.compile()
    cem.fit(env, nb_steps=STEPS_PER_EPISODE * EPISODES, visualize=True, verbose=2, callbacks=callbacks)
    cem.save_weights('cem_{}_weights.h5f'.format(env.instrument.symbol), overwrite=True)
Exemple #7
0
def test_multi_cem_input():
    input1 = Input(shape=(2, 3))
    input2 = Input(shape=(2, 4))
    x = Concatenate()([input1, input2])
    x = Flatten()(x)
    x = Dense(2)(x)
    model = Model(inputs=[input1, input2], outputs=x)

    memory = EpisodeParameterMemory(limit=10, window_length=2)
    processor = MultiInputProcessor(nb_inputs=2)
    agent = CEMAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4,
                     processor=processor, train_interval=50)
    agent.compile()
    agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=100)
Exemple #8
0
def create_cem_agent(env):
    ''' create cem agent '''
    env = create_environment()
    model = create_deep_model(env)
    nb_actions = env.action_space.n
    memory = EpisodeParameterMemory(limit=1000, window_length=1)
    cem = CEMAgent(
        model=model,
        nb_actions=nb_actions,
        memory=memory,
        batch_size=50,
        nb_steps_warmup=2000,
        train_interval=50,
        elite_frac=0.05)
    cem.compile()
    return cem
Exemple #9
0
def create(env):
    np.random.seed(config.current.domain_seed)
    env.seed(config.current.domain_seed)
    nb_actions = env.action_space.n

    obs_dim = env.observation_space.shape[0]

    # Option 1 : Simple model
    #model = Sequential()
    #model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    #model.add(Dense(nb_actions))
    #model.add(Activation('softmax'))

    # Option 2: deep network
    model = Sequential()
    model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('softmax'))

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = EpisodeParameterMemory(limit=1000, window_length=1)

    cem = CEMAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   batch_size=50,
                   nb_steps_warmup=2000,
                   train_interval=50,
                   elite_frac=0.05)
    cem.compile()
    return cem
Exemple #10
0
def main(options):
    # store args
    model_type = options.model_type
    train_interval_cem = options.train_interval_cem
    batch_size_cem = options.batch_size_cem
    steps_cem = options.steps_cem
    batch_size_props = options.batch_size_props
    steps_props = options.steps_props
    trunc_thres = options.trunc_thres
    Lmax = options.Lmax
    delta = options.delta

    # CEM
    # init environment
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    model = initModel(model_type, nb_actions, env.observation_space.shape)
    memory = initMemory()

    cem = CEMAgent(model=model,
                   nb_actions=nb_actions,
                   memory=memory,
                   batch_size=batch_size_cem,
                   nb_steps_warmup=1000,
                   train_interval=train_interval_cem,
                   elite_frac=0.05)
    cem.compile()
    callback_cem = cem.fit(env, nb_steps=steps_cem, visualize=False, verbose=0)
    cem.save_weights('cem_dumps/cem_{}_{}_ti_{}_bs_{}_steps_{}.h5f'.format(
        ENV_NAME, model_type, train_interval_cem, batch_size_cem, steps_cem),
                     overwrite=True)
    #cem.test(env, nb_episodes=1, visualize=False)

    # PROPS
    # init environment
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    model = initModel(model_type, nb_actions, env.observation_space.shape)
    memory = initMemory()

    bound_opts = {
        'analytic_jac': True,
        'normalize_weights': True,
        'truncate_weights': True,
        'truncate_thresh': trunc_thres
    }

    props = PROPSAgent(model=model,
                       nb_actions=nb_actions,
                       memory=memory,
                       Lmax=Lmax,
                       delta=delta,
                       bound_opts=bound_opts,
                       batch_size=batch_size_props)
    props.compile()
    callback_props = props.fit(env,
                               nb_steps=steps_props,
                               visualize=False,
                               verbose=0)
    props.save_weights(
        'props_dumps/props_{}_{}_bs_{}_steps_{}_thres_{}_Lmax_{}_delta_{}.h5f'.
        format(ENV_NAME, model_type, batch_size_props, steps_props,
               trunc_thres, Lmax, delta),
        overwrite=True)
    #props.test(env, nb_episodes=1, visualize=False)

    df_cem = pd.DataFrame({'data': callback_cem.history['episode_reward']})
    #plt.plot(callback_cem.history['episode_reward'])
    plt.plot(df_cem.rolling(window=train_interval_cem).mean())

    df_props = pd.DataFrame({'data': callback_props.history['episode_reward']})
    #plt.plot(callback_props.history['episode_reward'])
    plt.plot(df_props.rolling(window=batch_size_props).mean())

    plt.legend(['cem', 'props'], loc='upper left')
    #plt.show()
    plt.savefig('plots/{}_{}_bs_{}_thres_{}_Lmax_{}_delta_{}.jpeg'.format(
        ENV_NAME, model_type, batch_size_props, trunc_thres, Lmax, delta))
Exemple #11
0
    model.add(Flatten(input_shape=(1, 10)))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(n_action))
    model.add(Activation('softmax'))
​

    agent = CEMAgent(model=model, nb_actions=n_action, memory=memory,
                   batch_size=50, nb_steps_warmup=1000, train_interval=50, elite_frac=0.05, noise_ampl=4)

​
    agent.compile()
​
    # Training
    env_player.play_against(
        env_algorithm=agent_training,
        opponent=random_opponent,
                            env_algorithm_kwargs={"agent": agent, "nb_steps": NB_TRAINING_STEPS, "filename": TRAINING_OPPONENT},
    )
    model.save("model_%d" % NB_TRAINING_STEPS)
​
    # Evaluation
    print("Results against random player:")
    env_player.play_against(
        env_algorithm=agent_evaluation,
        opponent=random_opponent,
        env_algorithm_kwargs={"agent": agent, "nb_episodes": NB_EVALUATION_EPISODES, "filename": f'({TRAINING_OPPONENT}_{NB_TRAINING_STEPS})RandomPlayer'},
Exemple #12
0
nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]

model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape, name = 'Duda-flatten'))
model.add(Dense(nb_actions, name = 'Duda-dense'))
model.add(Activation('softmax', name = 'Duda-relu'))

print(model.summary())

memory = EpisodeParameterMemory(limit=1000, window_length=1)

cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
               batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
cem.compile()
print('\033[93m' + "Model is compiled"+'\033[0m')
cem.fit(env, nb_steps=1000, visualize=True, verbose=2)
print('\033[93m' + "Training"+'\033[0m')

cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
cem.test(env, nb_episodes=5, visualize=True)

#observations = np.array((4,10))
observations = []
for _ in range(10):
    observations.append(deepcopy(env.reset()))
observations = np.asarray(observations)
def train():
    # Get the environment and extract the number of actions.
    env = gym.make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)

    nb_actions = env.action_space.n
    obs_dim = env.observation_space.shape[0]

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    from keras import backend as K
    K.set_session(sess)

    # Option 1 : Simple model
    # model = Sequential()
    # model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    # model.add(Dense(nb_actions))
    # model.add(Activation('softmax'))

    # Option 2: deep network
    model = Sequential()
    model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(16))
    model.add(Activation('relu'))
    model.add(Dense(nb_actions))
    model.add(Activation('softmax'))

    model.summary()

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = EpisodeParameterMemory(limit=1000, window_length=1)

    if REWARD == "normal":
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)
        cem.compile()
        history_normal = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        cem.save_weights(os.path.join(LOG_DIR, 'cem_normal_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
        cem.test(env, nb_episodes=5, visualize=False)

        pandas.DataFrame(history_normal.history).to_csv(os.path.join(LOG_DIR, "normal.csv"))

    elif REWARD == "noisy":
        if not SMOOTH:
            processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=False)
        else:
            processor_noisy = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=False)

        # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=False)
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05,
                       processor=processor_noisy)
        cem.compile()
        history_noisy = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        if not SMOOTH:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy.csv"))

        else:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_noisy_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_noisy.history).to_csv(os.path.join(LOG_DIR, "noisy_smooth.csv"))

        cem.test(env, nb_episodes=5, visualize=False)

    elif REWARD == "surrogate":
        if not SMOOTH:
            processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=False, surrogate=True)
        else:
            processor_surrogate = CartpoleProcessor(e_=ERR_N, e=ERR_P, smooth=True, surrogate=True)

        # processor_surrogate = CartpoleSurrogateProcessor(e_=ERR_N, e=ERR_P, surrogate=True)
        cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory,
                       batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05,
                       processor=processor_surrogate)
        cem.compile()
        history_surrogate = cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
        if not SMOOTH:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate.csv"))
        else:
            cem.save_weights(os.path.join(LOG_DIR, 'cem_surrogate_smooth_{}_params.h5f'.format(ENV_NAME)), overwrite=True)
            pandas.DataFrame(history_surrogate.history).to_csv(os.path.join(LOG_DIR, "surrogate_smooth.csv"))

        cem.test(env, nb_episodes=5, visualize=False)

    else:
        raise NotImplementedError
Exemple #14
0
class KerasRlCemAgent(KerasRlAgent):
    """Keras-rl implementation of the cross-entropy method algorithm.

        see "https://learning.mpi-sws.org/mlss2016/slides/2016-MLSS-RL.pdf" and
            "https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.81.6579&rep=rep1&type=pdf"
    """
    class CemCallback(rl.callbacks.Callback):
        """Callback registered with keras rl agents to propagate iteration and episode updates."""
        def __init__(self, cem_agent: KerasRlAgent,
                     cem_context: core.CemTrainContext, nb_steps: int):
            """
            Args:
                cem_agent: the agent to propagate iteration begn/end events to.
                cem_context: the train_context containing the iteration definitions
                nb_steps: value set in the keras cem agent.
            """
            assert cem_agent
            assert cem_context
            assert nb_steps
            self._cem_agent: KerasRlAgent = cem_agent
            self._cem_context: core.CemTrainContext = cem_context
            self._nb_steps = nb_steps
            super().__init__()

        def on_episode_end(self, episode, logs=None):
            """Signals the base class the end / begin of a training iteration."""
            cc: core.CemTrainContext = self._cem_context
            episode = episode + 1
            if episode % cc.num_episodes_per_iteration == 0:
                self._cem_agent.on_train_iteration_end(math.nan)
                if self._cem_context.training_done:
                    self._cem_agent._agent.step = self._nb_steps
                else:
                    self._cem_agent.on_train_iteration_begin()

    def train_implementation(self, train_context: core.CemTrainContext):
        assert train_context
        cc: core.CemTrainContext = train_context
        train_env = self._create_env()
        keras_model = self._create_model(gym_env=train_env,
                                         activation='softmax')

        policy_buffer_size = 5 * cc.num_episodes_per_iteration
        self.log_api(f'EpisodeParameterMemory',
                     f'(limit={policy_buffer_size}, window_length=1)')
        memory = EpisodeParameterMemory(limit=policy_buffer_size,
                                        window_length=1)
        num_actions = train_env.action_space.n
        self.log_api(f'CEMAgent', f'(model=..., nb_actions={num_actions}, memory=..., ' + \
                     f'nb_steps_warmup={cc.num_steps_buffer_preload}, ' + \
                     f'train_interval={cc.num_episodes_per_iteration}, ' + \
                     f'batch_size={cc.num_episodes_per_iteration}, ' + \
                     f'elite_frac={cc.elite_set_fraction})')
        self._agent = CEMAgent(model=keras_model,
                               nb_actions=num_actions,
                               memory=memory,
                               nb_steps_warmup=cc.num_steps_buffer_preload,
                               batch_size=cc.num_episodes_per_iteration,
                               train_interval=cc.num_episodes_per_iteration,
                               elite_frac=cc.elite_set_fraction)
        self.log_api(f'agent.compile', '()')
        self._agent.compile()
        nb_steps = cc.num_iterations * cc.num_episodes_per_iteration * cc.max_steps_per_episode
        callback = KerasRlCemAgent.CemCallback(self, cc, nb_steps)
        self.on_train_iteration_begin()
        self.log_api(f'agent.fit', f'(train_env, nb_steps={nb_steps})')
        self._agent.fit(train_env,
                        nb_steps=nb_steps,
                        visualize=False,
                        verbose=0,
                        callbacks=[callback])
        if not cc.training_done:
            self.on_train_iteration_end(math.nan)
Exemple #15
0
def run_agent(agent):
    print("started new process")

    import tensorflow as tf
    from keras.backend.tensorflow_backend import set_session

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    set_session(tf.Session(config=config))

    WINDOW_LENGTH = 1

    num_actions = 3
    view_shape = (21, 21)
    input_shape = (WINDOW_LENGTH, ) + view_shape

    env = RestrictedViewTronEnv(agent, 10)

    model = Sequential()

    model.add(Permute((2, 3, 1), input_shape=input_shape))

    model.add(Conv2D(16, (3, 3), padding="same"))
    model.add(Activation("relu"))

    model.add(Conv2D(32, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(Flatten())

    model.add(Dense(256))
    model.add(Activation("relu"))

    model.add(Dense(num_actions))
    model.add(Activation('softmax'))
    np.random.seed(2363)

    #policy = LinearAnnealedPolicy(BoltzmannQPolicy(), attr='tau', value_max=2.,
    #                              value_min=.1, value_test=.1, nb_steps=1000000 // 10)

    processor = TronProcessor()

    memory = EpisodeParameterMemory(limit=1000000, window_length=WINDOW_LENGTH)

    cem = CEMAgent(model,
                   nb_actions=num_actions,
                   memory=memory,
                   nb_steps_warmup=50000 // 5,
                   train_interval=4)

    #dqn.compile(Adam(lr=.00025), metrics=["mae"])
    cem.compile()

    weights_filename = 'tmp/dqn_test_weights.h5f'
    checkpoint_weights_filename = 'tmp/dqn_test_weights_{step}.h5f'
    log_filename = 'tmp/dqn_test_log.json'
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename,
                                interval=250000 // 10)
    ]
    callbacks += [FileLogger(log_filename, interval=10000)]

    def train(transfer=False):
        print(cem.get_config())  # todo save to file

        if transfer:
            cem.load_weights(weights_filename)

        cem.fit(env,
                callbacks=callbacks,
                nb_steps=1750000 // 10,
                log_interval=10000)
        cem.save_weights(weights_filename, overwrite=True)
        cem.test(env, nb_episodes=20, visualize=True)

    def opponent():
        cem.load_weights('tmp/dqn_test_weights.h5f')
        cem.test(env, nb_episodes=200000, visualize=False)

    def test():
        cem.load_weights('tmp/dqn_test_weights.h5f')
        cem.test(env, nb_episodes=20, visualize=True)

    # opponent()
    train()  # True
class KerasCEMAgent(AbstractAgent):
    def __init__(self, env, timesteps_per_episode=10001):
        super().__init__(env, timesteps_per_episode)
        self.evaluating = False
        self.action_size = env.action_space.n
        self.state_size = env.num_states
        self.model = self._build_compile_model()
        memory = EpisodeParameterMemory(limit=1000, window_length=1)
        self.agent = CEMAgent(model=self.model, nb_actions=self.action_size, memory=memory, batch_size=50,
                              nb_steps_warmup=2000, train_interval=50, elite_frac=0.05)

    def run(self) -> {str: float}:
        """
        The agent's training method.
        Returns: a dictionary - {"episode_reward_mean": __, "episode_reward_min": __, "episode_reward_max": __,
        "episode_len_mean": __}
        """
        self.agent.compile()
        history = self.agent.fit(self.env, nb_steps=ITER_NUM, visualize=False, verbose=1)
        if len(history.history) > 0:
            episode_reward = history.history["episode_reward"]
            nb_episode_steps = history.history["nb_episode_steps"]
        else:
            episode_reward, nb_episode_steps = [0], [0]  # TODO - placeholder
        result = {EPISODE_REWARD_MEAN: np.array(episode_reward),
                  EPISODE_STEP_NUM_MEAN: np.array(nb_episode_steps),
                  EPISODE_REWARD_MIN: np.empty([]),
                  EPISODE_REWARD_MAX: np.empty([]), EPISODE_VARIANCE: np.empty([])}
        return result

    def _build_compile_model(self):
        ## simple net
        # model = Sequential()
        # model.add(Flatten(input_shape=(1,) + self.env.observation_space.shape))
        # model.add(Dense(self.action_size))
        # model.add(Activation('softmax'))
        # return model
        model = Sequential()
        model.add(Flatten(input_shape=(1,) + self.env.observation_space.shape))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(16))
        model.add(Activation('relu'))
        model.add(Dense(self.action_size))
        model.add(Activation('softmax'))
        return model

    def compute_action(self, state) -> int:
        """
        Computes the best action from a given state.
        Returns: a int that represents the best action.
        """
        # state = np.array([[state]])
        return int(np.argmax(self.model.predict(state)))

    def stop_episode(self):
        pass

    def episode_callback(self, state, action, reward, next_state, terminated):
        pass

    def evaluate(self, visualize=False):
        self.agent.test(self.env, nb_episodes=5, visualize=visualize, nb_max_episode_steps=60)

    def replay_experiences(self):
        pass
Exemple #17
0
import numpy as np
import gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from rl.agents.cem import CEMAgent
from rl.memory import EpisodeParameterMemory

ENV_NAME = 'CartPole-v0'
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
print(model.summary())
memory = EpisodeParameterMemory(limit=1000, window_length=1)
cem = CEMAgent(model=model,
               nb_actions=nb_actions,
               memory=memory,
               batch_size=50,
               nb_steps_warmup=2000,
               train_interval=50,
               elite_frac=0.05)
cem.compile()
cem.fit(env, nb_steps=100000, visualize=False, verbose=2)
cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True)
cem.test(env, nb_episodes=5, visualize=True)
nb_actions = env.action_space.n
obs_dim = env.observation_space.shape[0]

modelL = Sequential()
modelL.add(LSTM(nb_actions, input_shape=(1, ) + env.observation_space.shape))
modelL.add(Activation('softmax'))
memoryL = EpisodeParameterMemory(limit=1000, window_length=1)
cemL = CEMAgent(model=modelL,
                nb_actions=nb_actions,
                memory=memoryL,
                batch_size=50,
                nb_steps_warmup=2000,
                train_interval=50,
                elite_frac=0.05)
cemL.compile()
histL = cemL.fit(env, nb_steps=50000, visualize=False, verbose=1)
cemL.save_weights('cemL_{}_params.h5f'.format(ENV_NAME), overwrite=True)
cemL.test(env, nb_episodes=5, visualize=True)

modelD = Sequential()
modelD.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
modelD.add(Dense(nb_actions))
modelD.add(Activation('softmax'))
memoryD = EpisodeParameterMemory(limit=1000, window_length=1)
cemD = CEMAgent(model=modelD,
                nb_actions=nb_actions,
                memory=memoryD,
                batch_size=50,
                nb_steps_warmup=2000,
                train_interval=50,