Ejemplo n.º 1
0
def generate_insurance_model(env=None,
                             lr=.0001,
                             memory_len=100,
                             target_model_update=.09):
    ins_actor = Sequential()
    ins_actor.add(Flatten(input_shape=(1, ) + (env.NUM_INSURANCES, 21)))
    ins_actor.add(Dense(NUM_HIDDEN_UNITS))
    ins_actor.add(Activation('relu'))
    ins_actor.add(Dense(NUM_HIDDEN_UNITS))
    ins_actor.add(Activation('relu'))
    ins_actor.add(Dense(NUM_HIDDEN_UNITS))
    ins_actor.add(Activation('relu'))
    ins_actor.add(Dense(1))
    ins_actor.add(Activation('softsign'))
    # print(ins_actor.summary())
    # print(ins_actor.layers[-1].activation)

    action_input = Input(shape=(1, ), name='action_input')
    observation_input = Input(shape=(1, ) + (env.NUM_INSURANCES, 21),
                              name='observation_input')
    flattened_observation = Flatten()(observation_input)
    x = Concatenate()([action_input, flattened_observation])
    x = Dense(NUM_HIDDEN_UNITS)(x)
    x = Activation('relu')(x)
    x = Dense(NUM_HIDDEN_UNITS)(x)
    x = Activation('relu')(x)
    x = Dense(NUM_HIDDEN_UNITS)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)
    x = Activation('softsign')(x)
    ins_critic = Model(inputs=[action_input, observation_input], outputs=x)
    # print(ins_critic.summary(()))

    ins_memory = SequentialMemory(limit=memory_len, window_length=1)
    # ins_random_process = OrnsteinUhlenbeckProcess(size=1, theta=.15, mu=0, sigma=.3)
    ins_random_process = GaussianWhiteNoiseProcess(mu=0,
                                                   sigma=0.2,
                                                   sigma_min=0.005,
                                                   n_steps_annealing=5000)
    # ins_random_process = None
    ins_agent = DDPGAgent(nb_actions=1,
                          actor=ins_actor,
                          critic=ins_critic,
                          critic_action_input=action_input,
                          memory=ins_memory,
                          nb_steps_warmup_critic=100,
                          nb_steps_warmup_actor=100,
                          random_process=ins_random_process,
                          gamma=.99,
                          target_model_update=target_model_update)
    # ins_agent.processor = MultiInputProcessor(3)
    ins_agent.compile(Adam(lr=lr, clipnorm=1.), metrics=['mae'])

    print(type(ins_agent))

    return ins_agent
def train_model(seed=1):
    np.random.seed(seed)
    env = CameraControlEnvCont()
    env.seed(seed)

    actor, critic, action_input = define_actor_critic_models(actions=3)

    memory = SequentialMemory(limit=10000, window_length=1)

    random_process = GaussianWhiteNoiseProcess(mu=0,
                                               sigma=0.1,
                                               sigma_min=0.01,
                                               n_steps_annealing=49000,
                                               size=3)

    agent = DDPGAgent(nb_actions=3,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=500,
                      nb_steps_warmup_actor=500,
                      random_process=random_process,
                      gamma=.1,
                      target_model_update=1e-3,
                      batch_size=32)
    agent.compile([RMSprop(lr=.0001), RMSprop(lr=.01)], metrics=['mae'])

    log_filename = 'results/drone_camera_cont_control_log.json'
    model_checkpoint_filename = 'results/drone_camera_cont_cnn_weights_{step}.model'
    callbacks = [
        ModelIntervalCheckpoint(model_checkpoint_filename, interval=5000)
    ]
    callbacks += [FileLogger(log_filename, interval=1)]

    agent.fit(env,
              nb_steps=50000,
              nb_max_episode_steps=100,
              verbose=2,
              visualize=False,
              log_interval=1,
              callbacks=callbacks)
Ejemplo n.º 3
0
    env = LinearEnv(disturbance=disturbance, nb_tracking=future_steps_tracing[count], Q=Q, R=R, path_dist=path_dist,
                    reward_shaping=constraints)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    K.set_session(sess)

    nb_actions = env.action_space.shape[0]
    nb_observations = env.observation_space.shape[0]
    nb_disturbance = 3 * future_steps_dist[count]  # dim d x future steps

    # define exploration noise
    random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=theta, mu=mu, sigma=sigma)
    if constraints == 'SafetyLayer':
        random_process = GaussianWhiteNoiseProcess(size=nb_actions, mu=mu, sigma=sigma)
    # create Actor
    actor = ActorModel(sess, nb_observations, nb_actions, nb_disturbance+future_steps_tracing[count])
    # create Critic
    critic = CriticModel(sess, nb_observations, nb_actions, env, nb_disturbance+future_steps_tracing[count])

    """----FIT DDPG--------------------------------------------------------------------------------------------------"""
    # generate an DDPG object
    agent = DDPG_Agent(actor=actor, critic=critic, critic_action_input=critic.InputActions,
                       constraints_net=constraints_all, memory=replayBuffer, random_process=random_process, constraint=constraints,
                       nb_disturbance=nb_disturbance, nb_tracing=future_steps_tracing[count])

    print("DDPG object generated for run", count+1, "\nTraining starts...")

    # load weights of actor and critic if excisting
    if os.path.isfile('ddpg_{}_weights.h5f'.format(ENV_NAME)):
    def create(self):
        """Create the agent"""
        assert len(self.agent_helper.env.action_space.shape) == 1
        nb_actions = int(self.agent_helper.env.action_space.shape[0])

        # set #nodes and #sfs based on env limits. used for splitting the output layer and action processor
        num_nodes = self.agent_helper.env.env_limits.MAX_NODE_COUNT
        num_sfcs = self.agent_helper.env.env_limits.MAX_SF_CHAIN_COUNT
        num_sfs = self.agent_helper.env.env_limits.MAX_SERVICE_FUNCTION_COUNT

        # create the actor NN
        observation_input = Input(shape=(1,) + self.agent_helper.env.observation_space.shape, name='observation_input')
        flattened_observation = Flatten()(observation_input)
        prev_layer = flattened_observation
        # create hidden layers according to config
        for num_hidden in self.agent_helper.config['actor_hidden_layer_nodes']:
            hidden_layer = Dense(num_hidden,
                                 activation=self.agent_helper.config['actor_hidden_layer_activation'])(prev_layer)
            prev_layer = hidden_layer
        # split output layer into separate parts for each node and SF and apply softmax individually
        out_parts = [Dense(num_nodes, activation='softmax')(prev_layer) for _ in range(num_nodes * num_sfs)]
        out = Concatenate()(out_parts)
        actor = Model(inputs=observation_input, outputs=out)

        # create the critic NN
        action_input = Input(shape=(nb_actions,), name='action_input')
        observation_input = Input(shape=(1,) + self.agent_helper.env.observation_space.shape, name='observation_input')
        flattened_observation = Flatten()(observation_input)
        prev_layer = Concatenate()([action_input, flattened_observation])
        # create hidden layers according to config
        for num_hidden in self.agent_helper.config['critic_hidden_layer_nodes']:
            hidden_layer = Dense(num_hidden,
                                 activation=self.agent_helper.config['critic_hidden_layer_activation'])(prev_layer)
            prev_layer = hidden_layer
        out_critic = Dense(1, activation='linear')(prev_layer)
        critic = Model(inputs=[action_input, observation_input], outputs=out_critic)

        # write NN summary to string
        actor_summary_lst = []
        actor.summary(print_fn=actor_summary_lst.append)
        actor_summary = "".join(actor_summary_lst)
        actor.summary(print_fn=logger.debug)

        # write NN summary to string
        critic_summary_lst = []
        critic.summary(print_fn=critic_summary_lst.append)
        critic_summary = "".join(critic_summary_lst)
        critic.summary(print_fn=logger.debug)

        # This following line is causing aliasing issues. Ex: 'nb_observation' is added to agent_config
        self.agent_helper.result.agent_config = copy.copy(self.agent_helper.config)  # Set agent params in result file
        self.agent_helper.result.agent_config['nb_observation'] = self.agent_helper.env.observation_space.shape[0]
        self.agent_helper.result.agent_config['nb_actions'] = nb_actions

        self.agent_helper.result.agent_config['actor'] = {}
        self.agent_helper.result.agent_config['actor']['summary'] = actor_summary

        self.agent_helper.result.agent_config['critic'] = {}
        self.agent_helper.result.agent_config['critic']['summary'] = critic_summary
        self.agent_helper.result.agent_config['metrics'] = ['mae']

        # creating the Agent
        processor = ActionScheduleProcessor(num_nodes=num_nodes, num_sfcs=num_sfcs, num_sfs=num_sfs)
        memory = SequentialMemory(limit=self.agent_helper.config['mem_limit'],
                                  window_length=self.agent_helper.config['mem_window_length'])
        random_process = GaussianWhiteNoiseProcess(sigma=self.agent_helper.config['rand_sigma'],
                                                   mu=self.agent_helper.config['rand_mu'], size=nb_actions)

        agent = DDPGAgent(nb_actions=nb_actions,
                          actor=actor,
                          critic=critic,
                          critic_action_input=action_input,
                          memory=memory,
                          nb_steps_warmup_critic=self.agent_helper.config['nb_steps_warmup_critic'],
                          nb_steps_warmup_actor=self.agent_helper.config['nb_steps_warmup_actor'],
                          random_process=random_process,
                          gamma=self.agent_helper.config['gamma'],
                          target_model_update=self.agent_helper.config['target_model_update'],
                          processor=processor,
                          batch_size=64)
        agent.compile(Adam(lr=self.agent_helper.config['learning_rate'],
                           decay=self.agent_helper.config['learning_rate_decay']), metrics=['mae'])
        self.agent = agent
def evaluate_model(model_path=None, interactive=False, seed=12345):
    np.random.seed(seed)

    actor, critic, action_input = define_actor_critic_models(actions=3)
    memory = SequentialMemory(limit=10000, window_length=1)
    random_process = GaussianWhiteNoiseProcess(mu=0,
                                               sigma=0,
                                               sigma_min=0,
                                               n_steps_annealing=1)

    agent = DDPGAgent(nb_actions=3,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=500,
                      nb_steps_warmup_actor=100,
                      random_process=random_process,
                      gamma=.95,
                      target_model_update=0.0001,
                      batch_size=32)
    agent.compile([RMSprop(lr=.0001), RMSprop(lr=.01)], metrics=['mae'])

    if model_path is not None:
        agent.load_weights(model_path)

    # Train Evaluation
    env = CameraControlEnvCont(dataset_pickle_path='data/dataset.pickle',
                               testing=False,
                               interactive=interactive)
    env.seed(seed)
    res = agent.test(env,
                     nb_episodes=500,
                     nb_max_episode_steps=100,
                     verbose=0,
                     visualize=False)
    train_mean_reward = np.mean(res.history['episode_reward'])
    before_train_position_error = np.mean(
        np.abs(env.init_position_error_pixels))
    before_train_zoom_error = np.mean(np.abs(env.init_zoom_error_pixels))
    after_train_position_error = np.mean(
        np.abs(env.final_position_error_pixels))
    after_train_zoom_error = np.mean(np.abs(env.final_zoom_error_pixels))
    print("Training evaluation: ")
    print("Mean reward: ", train_mean_reward)
    print("Position: ", before_train_position_error, " -> ",
          after_train_position_error)
    print("Zoom: ", before_train_zoom_error, " -> ", after_train_zoom_error)

    # Test Evaluation
    env = CameraControlEnvCont(dataset_pickle_path='data/dataset.pickle',
                               testing=True,
                               interactive=interactive)
    env.seed(seed)
    res = agent.test(env,
                     nb_episodes=500,
                     nb_max_episode_steps=100,
                     verbose=0,
                     visualize=False)
    train_mean_reward = np.mean(res.history['episode_reward'])
    before_train_position_error = np.mean(
        np.abs(env.init_position_error_pixels))
    before_train_zoom_error = np.mean(np.abs(env.init_zoom_error_pixels))
    after_train_position_error = np.mean(
        np.abs(env.final_position_error_pixels))
    after_train_zoom_error = np.mean(np.abs(env.final_zoom_error_pixels))
    print("Testing evaluation: ")
    print("Mean reward: ", train_mean_reward)
    print("Position: ", before_train_position_error, " -> ",
          after_train_position_error)
    print("Zoom: ", before_train_zoom_error, " -> ", after_train_zoom_error)
Ejemplo n.º 6
0
    def __init__(self, name, env, grayscale, width, height):
        super(DDPGLearner, self).__init__(name=name, env=env)

        self.nb_actions = env.available_actions
        self.abs_max_reward = env.abs_max_reward
        self.mission_name = env.mission_name

        self.grayscale = grayscale
        self.width = width
        self.height = height

        self.recurrent = False  # Use LSTM
        self.batch_size = 32
        self.window_length = 4

        if tf:
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            sess = tf.Session(config=config)
            tensorflow_backend.set_session(session=sess)

        if not self.recurrent:
            self.actor, self.critic, self.action_input = Minecraft_DDPG(
                self.window_length, self.grayscale, self.width, self.height,
                self.nb_actions)
        else:
            self.actor, self.critic, self.action_input = Minecraft_DDPG_LSTM(
                self.window_length, self.grayscale, self.width, self.height,
                self.nb_actions)

        # Replay memory
        self.memory = SequentialMemory(limit=1000000,
                                       window_length=self.window_length)

        # Add random noise for exploration
        self.random_process = GaussianWhiteNoiseProcess(mu=0.0,
                                                        sigma=0.5,
                                                        size=self.nb_actions)
        '''
        # We can also generate exploration noise with different parameters for each action. This is because we may want
        # eg. the agent to be more likely to explore moving forward than backward. In that case, a list or tuple of
        # random processes, one for each action, must be passed to the agent.
        # For example:

        self.random_process = []
        self.random_process.append(GaussianWhiteNoiseProcess(mu=1.5, sigma=1.0))  # For moving
        self.random_process.append(GaussianWhiteNoiseProcess(mu=0.0, sigma=1.0))  # For turning
        '''

        self.processor = MalmoProcessor(self.grayscale, self.window_length,
                                        self.recurrent, self.abs_max_reward)
        self.agent = DDPGAgent(actor=self.actor,
                               critic=self.critic,
                               critic_action_input=self.action_input,
                               nb_actions=self.nb_actions,
                               memory=self.memory,
                               batch_size=self.batch_size,
                               processor=self.processor,
                               random_process=self.random_process,
                               gamma=0.99,
                               nb_steps_warmup_actor=10000,
                               nb_steps_warmup_critic=10000,
                               target_model_update=1e-3)
        self.agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=(action_input, observation_input), outputs=x)
print(critic.summary())

# Create a replay memory
memory = SequentialMemory(limit=150000,
                          window_length=1,
                          ignore_episode_boundaries=True)

# Create a random process for exploration during training
random_process = GaussianWhiteNoiseProcess(mu=0.0,
                                           sigma=0.8,
                                           sigma_min=0.05,
                                           n_steps_annealing=650000)

# Create the agent
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  random_process=random_process,
                  nb_steps_warmup_actor=32,
                  nb_steps_warmup_critic=32,
                  target_model_update=1e-4,
                  gamma=0.9,
                  batch_size=32)
agent.compile(Adam(lr=1e-4), metrics=['mae'])
    print(critic.summary())

    # Create a replay memory
    memory = SequentialMemory(limit=5000, window_length=window_length)

    # Create a random process for exploration during training
    random_process = OrnsteinUhlenbeckProcess(theta=0.5,
                                              mu=0.0,
                                              sigma=0.1,
                                              dt=env.physical_system.tau,
                                              sigma_min=0.05,
                                              n_steps_annealing=85000,
                                              size=2)

    gauss_random_process = GaussianWhiteNoiseProcess(sigma=0.1,
                                                     sigma_min=0.05,
                                                     n_steps_annealing=85000,
                                                     size=2)

    # Create the agent
    agent = DDPGAgent(nb_actions=nb_actions,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      random_process=random_process,
                      nb_steps_warmup_actor=2048,
                      nb_steps_warmup_critic=1024,
                      target_model_update=1000,
                      gamma=0.9,
                      batch_size=128,
                      memory_interval=1)
Ejemplo n.º 9
0
                model = DRQN_Model(window_length=opt.dqn_window_length,
                                   num_actions=env.available_actions)
            else:
                model = DQN_Model(window_length=opt.dqn_window_length,
                                  num_actions=env.available_actions)
            # Setup DQN agent
            agent = DQN(model=model,
                        num_actions=env.available_actions,
                        policy=policy,
                        test_policy=policy,
                        processor=processor)
    else:
        assert not opt.recurrent
        # Setup random process for exploration
        random_process = [
            GaussianWhiteNoiseProcess(sigma=0.0, mu=1.0),
            GaussianWhiteNoiseProcess(sigma=1.0, mu=0.0)
        ]
        # Setup DDPG agent model
        actor, critic, action_input = DDPG_Model(
            window_length=opt.ddpg_window_length,
            num_actions=env.available_actions)
        # Setup DDPG agent
        agent = DDPG(actor=actor,
                     critic=critic,
                     critic_action_input=action_input,
                     num_actions=env.available_actions,
                     processor=processor,
                     random_process=random_process)

    print(mission_name + ' initialized.')
Ejemplo n.º 10
0
x = Activation('relu')(x)
# x = Dense(16)(x)
# x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)
print(critic.summary())

memory = SequentialMemory(limit=100000, window_length=1)
# Mean-converging process, has a momentum effect to minimise difference between the next
# and previous actions (mean = 0 -> have no effect on average)

# random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=[0.25, 0.25, 0.25], mu=[0.1, 0.0, 0.1], sigma=0.25)

# random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=0.2, mu=0.0, sigma=0.2)
random_process = GaussianWhiteNoiseProcess(size=nb_actions, mu = 0.0, sigma = 0.20, sigma_min = 0.01, n_steps_annealing = 5000)

agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=200, nb_steps_warmup_actor=500,
                  random_process=random_process, gamma=.99, target_model_update=1E-3)

# random_process = GaussianWhiteNoiseProcess(size=nb_actions, mu = 0.0, sigma = 0.1, sigma_min = 0.01, n_steps_annealing = 5000)
#
# agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
#                   memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100,
#                   random_process=random_process, gamma=.99, target_model_update=0.1)

agent.compile(Adam(lr=0.001, clipnorm=1.))

# agent.load_weights('ddpg_{}_SimpleG_ExplodeOkay_weights.h5f'.format(ENV_NAME))