def train(env, name, steps=25000, pretrained_path=None):
    agent = build_agent(env)
    # Load pre-trained weights optionally
    if pretrained_path is not None:
        agent.load_weights(pretrained_path)

    save_path = os.path.join("models", name)
    os.makedirs(save_path, exist_ok=False)
    os.makedirs(os.path.join(save_path, "checkpoints"), exist_ok=False)
    h = agent.fit(env,
                  nb_steps=steps,
                  visualize=False,
                  verbose=2,
                  callbacks=[
                      ModelIntervalCheckpoint(os.path.join(
                          save_path, "checkpoints", "chkpt_{step}.h5f"),
                                              interval=int(steps / 20),
                                              verbose=1),
                      TensorBoard(log_dir=os.path.join("logs", name))
                  ])

    pickle.dump(h.history, open(os.path.join(save_path, "history.pkl"), "wb"))

    agent.save_weights(os.path.join(save_path, "last_weights.h5f"),
                       overwrite=True)
Beispiel #2
0
def run_agent(queue_agent, model_name, mode="test", version=None):
    print("started new process")

    dqn = get_rl_agent(queue_agent)

    model_iteration = get_newest_version(model_name) if version is None else version

    transfer_weights_filename = Path(f"tmp/{model_name}-{model_iteration}/") / "dqn_test_weights.h5f"

    if mode == "train":
        model_path = Path(f"tmp/{model_name}-{model_iteration + 1}/")
        model_path.mkdir(parents=True)
        with open(model_path / "model_cfg.json", "w+") as fp:
            json.dump(dqn.get_config(), fp)
    else:
        model_path = Path(f"tmp/{model_name}-{model_iteration}/")
        if not model_path.exists():
            raise ValueError("Model does not exist")

    print(f"Using model {model_path}")

    view_distance = view_distance_from_dqn(dqn)
    env = RestrictedViewTronEnv(queue_agent, view_distance)

    weights_filename = str(model_path / 'dqn_test_weights.h5f')
    checkpoint_weights_filename = str(model_path / "dqn_test_weights_{step}.h5f")

    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500000 // TRAIN_DIV)]

    def train():
        if transfer_weights_filename.exists():
            dqn.load_weights(transfer_weights_filename)
            print(f"Transfer learning from old model loaded from '{transfer_weights_filename}'")

        dqn.fit(env, callbacks=callbacks, nb_steps=2000000 // TRAIN_DIV, log_interval=10000)
        dqn.save_weights(weights_filename, overwrite=True)
        dqn.save_weights("tmp/dqn_test_weights.h5f", overwrite=True)
        # dqn.test(env, nb_episodes=20, visualize=True)

    def opponent():
        while True:
            dqn.load_weights(weights_filename)
            dqn.test(env, nb_episodes=2000000, visualize=False, verbose=0)

    def test(steps=20, visualize=True):
        dqn.load_weights(weights_filename)
        dqn.test(env, nb_episodes=steps, visualize=visualize)

    if mode == "train":
        train()
    elif mode == "test":
        test(20, True)
    elif mode == "evaluate":
        test(1000, False)
    elif mode == "opponent":
        opponent()
    else:
        raise ValueError("Invalid mode")

    queue_agent.quit()
Beispiel #3
0
    def start(self):
        if self.train:
            weights_filename = 'dqn_{}_weights.h5f'.format(self.env_id)
            if self.weights:
                self.agent.load_weights(weights_filename)
                print('...loading weights for {}'.format(self.env_id))

            checkpoint_weights_filename = 'dqn_' + self.env_id + '_weights_{step}.h5f'
            log_filename = 'dqn_{}_log.json'.format(self.env_id)

            callbacks = [
                ModelIntervalCheckpoint(checkpoint_weights_filename,
                                        interval=250000)
            ]
            callbacks += [FileLogger(log_filename, interval=100)]

            self.agent.fit(self,
                           callbacks=callbacks,
                           nb_steps=self.training_steps,
                           log_interval=10000)
            self.agent.save_weights(weights_filename, overwrite=True)
            self.agent.test(self, nb_episodes=2, visualize=False)
            self.render()
        else:
            weights_filename = 'dqn_{}_weights.h5f'.format(self.env_id)
            self.agent.load_weights(weights_filename)
            self.agent.test(self, nb_episodes=2, visualize=False)
            self.render()
Beispiel #4
0
    def start(self):
        weights_filename = '{}/dqn_weights/dqn_{}_weights.h5f'.format(
            self.cwd, self.env_name)
        if self.weights:
            self.agent.load_weights(weights_filename)
            print('...loading weights for {}'.format(self.env_name))

        if self.train:
            checkpoint_weights_filename = 'dqn_' + self.env_name + \
                                          '_weights_{step}.h5f'
            checkpoint_weights_filename = '{}/dqn_weights/'.format(self.cwd) + \
                                          checkpoint_weights_filename
            log_filename = '{}/dqn_weights/dqn_{}_log.json'.format(
                self.cwd, self.env_name)
            print('FileLogger: {}'.format(log_filename))

            callbacks = [
                ModelIntervalCheckpoint(checkpoint_weights_filename,
                                        interval=250000)
            ]
            callbacks += [FileLogger(log_filename, interval=100)]

            print('Starting training...')
            self.agent.fit(self.env,
                           callbacks=callbacks,
                           nb_steps=self.number_of_training_steps,
                           log_interval=10000,
                           verbose=0,
                           visualize=self.visualize)
            print('Saving AGENT weights...')
            self.agent.save_weights(weights_filename, overwrite=True)
        else:
            print('Starting TEST...')
            self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
def train_model(seed=1, setup=0):
    np.random.seed(seed)

    if setup == 0:
        env = CameraControlEnv(a_p=0, a_r=0, e_thres=0)
    elif setup == 1:
        env = CameraControlEnv(a_p=0, a_r=0, e_thres=0.8)
    else:
        env = CameraControlEnv(a_p=0.5, a_r=0.2, e_thres=0.8)

    env.seed(seed)

    model = define_model(actions=7)

    memory = SequentialMemory(limit=10000, window_length=1)

    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1.0, value_min=0.1, value_test=0.05,
                                  nb_steps=95000)
    dqn = DQNAgent(model=model, nb_actions=7, policy=policy, memory=memory, processor=None,
                   nb_steps_warmup=500, gamma=0.95, delta_clip=1, target_model_update=0.001, batch_size=32)
    dqn.compile(RMSprop(lr=.0001), metrics=['mae'])

    log_filename = 'results/drone_camera_control_log_' + str(setup) + '.json'
    model_checkpoint_filename = 'results/drone_camera_cnn_weights_' + str(setup) + '_{step}.model'
    callbacks = [ModelIntervalCheckpoint(model_checkpoint_filename, interval=5000)]
    callbacks += [FileLogger(log_filename, interval=1)]

    dqn.fit(env, nb_steps=100000, nb_max_episode_steps=100, verbose=2, visualize=False, log_interval=1,
            callbacks=callbacks)

    # After training is done, save the final weights.
    model_filename = 'models/drone_camera_cnn_' + str(setup) + '.model'
    dqn.save_weights(model_filename, overwrite=True)
Beispiel #6
0
    def train(self):

        # Okay, now it's time to learn something! We capture the interrupt exception so that training
        # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks!
        weights_filename = self.get_weight_path(self.stock)
        checkpoint_weights_filename = self.get_weight_path(
            self.stock) + '_{step}.h5f'
        log_filename = 'dqn_{}_log.json'.format(self.stock)

        callbacks = [
            ModelIntervalCheckpoint(checkpoint_weights_filename,
                                    interval=250000)
        ]
        callbacks += [FileLogger(log_filename, interval=100)]
        # callbacks += [WandbLogger(
        #     project = "stock-bot-v0"
        # )]

        self.dqn.fit(self.env,
                     callbacks=callbacks,
                     nb_steps=1750000,
                     log_interval=10000)

        # After training is done, we save the final weights one more time.
        self.dqn.save_weights(weights_filename, overwrite=True)

        # Finally, evaluate our algorithm for 10 episodes.
        self.dqn.test(self.env, nb_episodes=10, visualize=False)
def rl_learn(a=None):
    keras.backend.clear_session()
    actor = actor_net(env)
    critic = critic_net(env)

    # build actor/critic network
    model_path = 'save_model/{}_weights.h5f'.format(agent_name)
    memory = SequentialMemory(limit=50000, window_length=1)
    histcallback = TrainHistoryLogCallback(file_path='save_model/',
                                           plot_interval=1000)
    chkpoint = ModelIntervalCheckpoint(filepath=model_path, interval=10000)

    # define policy
    policy_minigame = starcraft_multiagent_eGreedyPolicy(
        nb_agents=env.nb_agents, nb_actions=env.nb_actions)
    policy = LinearAnnealedPolicy(policy_minigame,
                                  attr='eps',
                                  value_max=1.,
                                  value_min=.1,
                                  value_test=.05,
                                  nb_steps=100000)
    test_policy = starcraft_multiagent_eGreedyPolicy(nb_agents=env.nb_agents,
                                                     nb_actions=env.nb_actions,
                                                     eps=0.05)

    agent = MA_DDPGAgent(nb_agents=env.nb_agents,
                         nb_actions=env.nb_actions,
                         actor=actor,
                         critic=critic,
                         action_type='discrete',
                         critic_action_input=critic.inputs[2:4],
                         train_interval=10,
                         batch_size=128,
                         memory=memory,
                         nb_steps_warmup_critic=5000,
                         reward_factor=0.1,
                         nb_steps_warmup_actor=5000,
                         policy=policy,
                         test_policy=test_policy,
                         gamma=.99,
                         target_model_update=1e-3)

    agent.compile([Adam(lr=5e-5), Adam(lr=5e-5)], metrics=['mae'])

    actor.summary()
    critic.summary()

    time.sleep(1)
    hist_train = agent.fit(env,
                           nb_steps=6000000,
                           nb_max_episode_steps=2000,
                           visualize=False,
                           verbose=2,
                           callbacks=[histcallback, chkpoint])

    np.save('save_model/hist_train.npy', hist_train.history)
    # After training is done, we save the final weights.
    agent.save_weights(model_path, overwrite=True)
    '''
Beispiel #8
0
def build_callbacks(env_name):
    checkpoint_weights_filename = 'dqn_' + env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    return callbacks
Beispiel #9
0
def build_callbacks(env_name):
    checkpoint_weights_filename = 'results/Swimmer/ddpg_' + env_name + '_weights_{step}.h5f'
    log_filename = 'results/Swimmer6/exp_5/ddpg_{}_log.json'.format(env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000)
    ]
    callbacks += [FileLogger(log_filename, interval=5000)]

    return callbacks
def train(index, policy_nb_steps, fit_nb_steps):

    # Get the environment and extract the number of actions.
    print("Using environment", environment_name)
    environment = gym.make(environment_name)
    np.random.seed(666)
    nb_actions = environment.action_space.shape[0]

    # Build the model.
    v_model, mu_model, l_model = build_models((WINDOW_LENGTH, ) + INPUT_SHAPE,
                                              nb_actions)
    v_model.summary()
    mu_model.summary()
    l_model.summary()

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
    processor = CarRacingProcessor()
    random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                              mu=0.,
                                              sigma=.3,
                                              size=nb_actions)

    agent = NAFAgent(nb_actions=nb_actions,
                     V_model=v_model,
                     L_model=l_model,
                     mu_model=mu_model,
                     memory=memory,
                     nb_steps_warmup=100,
                     random_process=random_process,
                     gamma=.99,
                     target_model_update=1e-3,
                     processor=processor)
    agent.compile(optimizers.Adam(lr=.001, clipnorm=1.), metrics=['mae'])

    weights_filename = 'naf_{}_{}_weights.h5f'.format(environment_name, index)

    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks!
    checkpoint_weights_filename = 'naf_' + environment_name + '_weights_{step}.h5f'
    log_filename = 'naf_{}_log.json'.format(environment_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)
    ]
    callbacks += [TensorboardCallback()]
    callbacks += [FileLogger(log_filename, interval=100)]
    agent.fit(
        environment,
        callbacks=callbacks,
        #nb_steps=1750000,
        nb_steps=fit_nb_steps,
        log_interval=10000,
        visualize="visualize" in sys.argv)

    # After training is done, we save the final weights one more time.
    agent.save_weights(weights_filename, overwrite=True)
Beispiel #11
0
def build_callbacks():
    checkpoint_weights_filename = PREFIX + '_weights_{step}.h5f'
    checkpoint_memory_filename = args.memory
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000)]
    callbacks += [MemoryIntervalCheckpoint(checkpoint_memory_filename, interval=1000)]
    callbacks += [TensorBoard(log_dir='./logs', histogram_freq=0,
                              write_graph=False, write_grads=False, write_images=False,
                              embeddings_freq=0, update_freq='epoch')]
    return callbacks
Beispiel #12
0
def build_callbacks(env_name, filename_pre, filename_exp):
    checkpoint_weights_filename = filename_pre + env_name + '_weights_{step}.h5f'
    log_filename = filename_pre + filename_exp + '/ddpg_{}_log.json'.format(
        env_name)
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=50000)
    ]
    callbacks += [FileLogger(log_filename, interval=50000)]

    return callbacks
Beispiel #13
0
 def train(self):
     callbacks = [
         ModelIntervalCheckpoint(checkpoint_weights_filename,
                                 interval=INTERVAL_CALLBACK),
         FileLogger(log_filename, interval=FILE_LOGGER_INTERVAL)
     ]
     self.dqn.fit(self.env,
                  callbacks=callbacks,
                  nb_steps=NB_STEPS,
                  log_interval=FIT_LOG_INTERVAL)
 def init_save_model_manager(self, outdir, interval):
     if interval is not None:
         model_dir = os.path.join(outdir, "models")
         if not os.path.exists(model_dir):  # Create if necessary
             os.makedirs(model_dir, exist_ok=True)
         fp = os.path.join(model_dir, 'weights.{step:02d}.hdf5')
         return ModelIntervalCheckpoint(filepath=fp,
                                        interval=interval,
                                        verbose=1)
     return None
Beispiel #15
0
    def start(self) -> None:
        """
        Entry point for agent training and testing

        :return: (void)
        """
        output_directory = os.path.join(self.cwd, 'dqn_weights')
        if not os.path.exists(output_directory):
            LOGGER.info('{} does not exist. Creating Directory.'.format(
                output_directory))
            os.mkdir(output_directory)

        weight_name = 'dqn_{}_{}_weights.h5f'.format(self.env_name,
                                                     self.neural_network_type)
        weights_filename = os.path.join(output_directory, weight_name)
        LOGGER.info("weights_filename: {}".format(weights_filename))

        if self.load_weights:
            LOGGER.info('...loading weights for {} from\n{}'.format(
                self.env_name, weights_filename))
            self.agent.load_weights(weights_filename)

        if self.train:
            step_chkpt = '{step}.h5f'
            step_chkpt = 'dqn_{}_weights_{}'.format(self.env_name, step_chkpt)
            checkpoint_weights_filename = os.path.join(self.cwd, 'dqn_weights',
                                                       step_chkpt)
            LOGGER.info("checkpoint_weights_filename: {}".format(
                checkpoint_weights_filename))
            log_filename = os.path.join(
                self.cwd, 'dqn_weights',
                'dqn_{}_log.json'.format(self.env_name))
            LOGGER.info('log_filename: {}'.format(log_filename))

            callbacks = [
                ModelIntervalCheckpoint(checkpoint_weights_filename,
                                        interval=250000)
            ]
            callbacks += [FileLogger(log_filename, interval=100)]

            LOGGER.info('Starting training...')
            self.agent.fit(self.env,
                           callbacks=callbacks,
                           nb_steps=self.number_of_training_steps,
                           log_interval=10000,
                           verbose=0,
                           visualize=self.visualize)
            LOGGER.info("training over.")
            LOGGER.info('Saving AGENT weights...')
            self.agent.save_weights(weights_filename, overwrite=True)
            LOGGER.info("AGENT weights saved.")
        else:
            LOGGER.info('Starting TEST...')
            self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
def build_callbacks():
    """
    callbacks for the deep q agent
    """
    checkpoint_weights_filename = 'model_checkpoints/dqn_weights_.h5f'
    log_filename = 'dqn_log.json'
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    return callbacks
Beispiel #17
0
def fit_dqn(fit_steps: int, cont: bool):
    training_env = gym.make('adver-v0', target_label=1, test_mode=False)

    callbacks = [ModelIntervalCheckpoint(DQN_WEIGHTS_FILENAME, interval=WEIGHTS_CHECKPOINT)]
    callbacks += [FileLogger(DQN_LOG_FILENAME, interval=LOG_CHECKPOINT)]
    dqn = init_dqn(fit_steps)
    if cont:
        dqn.load_weights(DQN_WEIGHTS_FILENAME.format(step="final"))
        print("continue fitting from last checkpoint")

    dqn.fit(training_env, callbacks=callbacks, nb_steps=fit_steps, start_step_policy=start_policy, nb_max_start_steps=3)

    dqn.save_weights(DQN_WEIGHTS_FILENAME.format(step="final"), overwrite=True)
Beispiel #18
0
def build_callbacks(env_name):
    log_dir = 'logs'
    if not exists(log_dir):
        os.makedirs(log_dir)

    checkpoint_weights_filename = join(
        log_dir, 'dqn_' + env_name + '_weights_{step}.h5f')
    log_filename = join(log_dir, 'dqn_{}_log.json'.format(env_name))
    callbacks = [
        ModelIntervalCheckpoint(checkpoint_weights_filename, interval=25000)
    ]
    callbacks += [FileLogger(log_filename, interval=100)]
    return callbacks
Beispiel #19
0
    def train(self, steps=50000, interval=100, visualise=False):
        callbacks = [
            ModelIntervalCheckpoint(self.checkpoint_path, interval=10),
            FileLogger(self.log_path, interval=100),
        ]

        self.dqn.fit(env=self.env,
                     nb_steps=steps,
                     visualize=visualise,
                     callbacks=callbacks,
                     log_interval=interval,
                     nb_max_episode_steps=self.env.total_steps - 1)

        self.dqn.save_weights(self.model_path, overwrite=True)
Beispiel #20
0
def trainDQN(cfg, env, dqn):
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    callbacks = [
        ModelIntervalCheckpoint(cfg.checkpoint_weights_filename,
                                interval=250000)
    ]
    callbacks += [FileLogger(cfg.log_filename, interval=100)]
    dqn.fit(env,
            callbacks=callbacks,
            nb_steps=cfg.nb_steps_dqn_fit,
            log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(cfg.weights_filename, overwrite=True)
Beispiel #21
0
 def fit(self, env, num_steps, weights_path=None, visualize=False):
   callbacks = []
   if weights_path is not None:
     callbacks += [ModelIntervalCheckpoint(weights_path, interval=50000, verbose=1)]
   self.agent.fit(env=env,
                  nb_steps=num_steps,
                  action_repetition=opt.dqn_action_repetition,
                  callbacks=callbacks,
                  log_interval=opt.log_interval,
                  test_interval=opt.test_interval,
                  test_nb_episodes=opt.test_nb_episodes,
                  test_action_repetition=opt.dqn_action_repetition,
                  visualize=visualize,
                  test_visualize=visualize,
                  verbose=1)
Beispiel #22
0
    def train(self, env):
        policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,nb_steps=1000)
        amount_memory = 10000000
        memory = SequentialMemory(limit=amount_memory, window_length=WINDOW_LENGTH)
        processor = EmptyProcessor()
        self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50, gamma=.99, target_model_update=10000,train_interval=4, delta_clip=1., enable_double_dqn=False)
        self.dqn.compile(Adam(lr=0.01), metrics=['mae'])

        weights_filename = 'dqn_{}_weights.h5f'.format(ENV_NAME)
        checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f'
        log_filename = 'dqn_{}_log.json'.format(ENV_NAME)
        callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=2500)]
        callbacks += [FileLogger(log_filename, interval=100)]
        #self.dqn.load_weights(ENV_NAME+'weights.h5f')
        self.dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000, visualize=True, verbose=2)
           # After training is done, we save the final weights one more time.
        self.dqn.save_weights(weights_filename, overwrite=True)
Beispiel #23
0
 def train(self, env, nb_actions):
     weights_filename = 'dqn_{}_weights.h5f'.format(ENV_NAME)
     checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f'
     log_filename = 'dqn_{}_log.json'.format(ENV_NAME)
     callbacks = [
         ModelIntervalCheckpoint(checkpoint_weights_filename,
                                 interval=10000)
     ]
     callbacks += [FileLogger(log_filename, interval=100)]
     self.dqn.fit(env,
                  callbacks=callbacks,
                  nb_steps=1750000,
                  log_interval=10000,
                  visualize=True,
                  verbose=3)
     # After training is done, we save the final weights one more time.
     self.dqn.save_weights(weights_filename, overwrite=True)
Beispiel #24
0
def train_dqn(agent,
              optimizer,
              train_episodes,
              episode_len,
              logdir,
              checkpoint,
              verbose_flag=1):
    tb_callback = [SubTensorBoard(logdir=logdir)]
    tb_callback += [ModelIntervalCheckpoint(checkpoint, 10000)]
    agent.compile(optimizer)
    env = gym.make('Tutankham-v4')
    agent.fit(env,
              visualize=False,
              nb_steps=train_episodes,
              verbose=verbose_flag,
              nb_max_episode_steps=episode_len,
              callbacks=tb_callback)
Beispiel #25
0
 def fit(self, env, nb_steps):
     weights_dir = 'weights/{}'.format(self.mission_name)
     if not os.path.exists(weights_dir):
         os.makedirs(weights_dir)
     weights_path = os.path.join(weights_dir, '{}'.format(self.name))
     callbacks = [
         ModelIntervalCheckpoint(weights_path, interval=10000, verbose=1)
     ]
     self.agent.fit(env,
                    nb_steps,
                    action_repetition=4,
                    callbacks=callbacks,
                    verbose=1,
                    log_interval=10000,
                    test_interval=10000,
                    test_nb_episodes=10,
                    test_action_repetition=4,
                    test_visualize=False)
Beispiel #26
0
def Run_DQL():
    model=build_model(14,4)
    num_actions = 4
    policy = EpsGreedyQPolicy(0.1)
    env = BusEnv("DQL")
    env.seed(123)
    memory = SequentialMemory(limit=5000, window_length=1)
    
    dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10,\
              target_model_update=1e-3, policy=policy,gamma=0.9,memory_interval=1)
    files = open("testDQL.csv","w")
    files.write("kq\n")
    #create callback
    callbacks = CustomerTrainEpisodeLogger("DQL_5phut.csv")
    callback2 = ModelIntervalCheckpoint("weight_DQL.h5f",interval=50000)
    callback3 = TestLogger11(files)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])
    dqn.fit(env, nb_steps= 94151, visualize=False, verbose=2,callbacks=[callbacks,callback2])
Beispiel #27
0
def main():

    # Get the environment and extract the number of actions.
    environment_name = "lawnmower-medium-obstacles-v0"
    environment = gym.make(environment_name)
    environment.print_description()
    nb_actions = environment.action_space.n

    # Build the model.
    model = build_model_cnn((WINDOW_LENGTH,) + INPUT_SHAPE, nb_actions)
    print(model.summary())

    # Create sequential memory for memory replay.
    memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)

    # Process environment inputs and outputs.
    processor = LawnmowerProcessor()

    # Use epsilon-greedy as our policy.
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                                  nb_steps=int(STEPS * 0.8))

    # Instantiate and compile our agent.
    dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
                   processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
                   train_interval=4, delta_clip=1.)
    dqn.compile(optimizers.Adam(lr=.00025), metrics=['mae'])

    # Set up some callbacks for training.
    checkpoint_weights_filename = 'dqn_' + environment_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(environment_name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [TensorboardCallback(os.path.join("tensorboard", datetime_string))]
    callbacks += [FileLogger(log_filename, interval=100)]

    # Train the agent.
    dqn.fit(environment, callbacks=callbacks, nb_steps=STEPS, log_interval=10000)

    # Save the final networkt after training.
    weights_filename = 'dqn_{}_weights.h5f'.format(environment_name)
    dqn.save_weights(weights_filename, overwrite=True)

    # Run the agent.
    dqn.test(environment, nb_episodes=10, visualize=False)
def train_model(seed=1):
    np.random.seed(seed)
    env = CameraControlEnvCont()
    env.seed(seed)

    actor, critic, action_input = define_actor_critic_models(actions=3)

    memory = SequentialMemory(limit=10000, window_length=1)

    random_process = GaussianWhiteNoiseProcess(mu=0,
                                               sigma=0.1,
                                               sigma_min=0.01,
                                               n_steps_annealing=49000,
                                               size=3)

    agent = DDPGAgent(nb_actions=3,
                      actor=actor,
                      critic=critic,
                      critic_action_input=action_input,
                      memory=memory,
                      nb_steps_warmup_critic=500,
                      nb_steps_warmup_actor=500,
                      random_process=random_process,
                      gamma=.1,
                      target_model_update=1e-3,
                      batch_size=32)
    agent.compile([RMSprop(lr=.0001), RMSprop(lr=.01)], metrics=['mae'])

    log_filename = 'results/drone_camera_cont_control_log.json'
    model_checkpoint_filename = 'results/drone_camera_cont_cnn_weights_{step}.model'
    callbacks = [
        ModelIntervalCheckpoint(model_checkpoint_filename, interval=5000)
    ]
    callbacks += [FileLogger(log_filename, interval=1)]

    agent.fit(env,
              nb_steps=50000,
              nb_max_episode_steps=100,
              verbose=2,
              visualize=False,
              log_interval=1,
              callbacks=callbacks)
Beispiel #29
0
def Run_FDQO():
    FDQO_method = Model_Deep_Q_Learning(14,4)
    model = FDQO_method.build_model()
    #Create enviroment FDQO
    env = BusEnv("FDQO")
    env.seed(123)
    #create memory
    memory = SequentialMemory(limit=5000, window_length=1)
    #create policy 
    policy = EpsGreedyQPolicy(0.0)
    #open files
    files = open("testFDQO.csv","w")
    files.write("kq\n")
    #create callback
    callbacks = CustomerTrainEpisodeLogger("FDQO_5phut.csv")
    callback2 = ModelIntervalCheckpoint("weight_FDQO.h5f",interval=50000)
    callback3 = TestLogger11(files)
    model.compile(Adam(lr=1e-3), metrics=['mae'])
    model.fit(env, nb_steps= 94151, visualize=False, verbose=2,callbacks=[callbacks,callback2])
    files.close()
def create_dqn(model,
               log_interval=50000,
               model_name='dqn_agent_checkpoint',
               file_log_path='./logs/log.txt',
               tensorboard_path='./logs/tensorboard/'):
    model_path = './models/' + model_name + '.h5'
    file_logger = FileLogger(file_log_path, interval=log_interval)
    checkpoint = ModelIntervalCheckpoint(model_path, interval=log_interval)
    tensorboard = TensorboardLogger(tensorboard_path)
    callbacks = [file_logger, checkpoint, tensorboard]

    # Use 4 last observation - history_length = 4
    memory = SequentialMemory(limit=500000, window_length=history_length)

    # Use combine of BoltzmannQPolicy and EpsGreedyQPolicy
    policy = MaxBoltzmannQPolicy()

    # Set epsilon to 1.0 and decrease it over every step to stop taking random action when map is explored
    policy = LinearAnnealedPolicy(inner_policy=policy,
                                  attr='eps',
                                  value_max=1.0,
                                  value_min=0.1,
                                  value_test=0.04,
                                  nb_steps=NUMBER_OF_STEPS)

    # Create an instance of DQNAgent from keras-rl
    dqn = DQNAgent(model=model,
                   nb_actions=env.action_space.n,
                   memory=memory,
                   policy=policy,
                   processor=CustomProcessor(),
                   nb_steps_warmup=512,
                   enable_dueling_network=True,
                   dueling_type='avg',
                   target_model_update=5e2,
                   batch_size=32)

    dqn.compile(Adam(lr=5e-4), metrics=['mae'])

    return dqn, callbacks