Ejemplo n.º 1
0
    def main(self, args):
        """
        Train and save the DQN model, for the cartpole problem
        :param args: (ArgumentParser) the input arguments
        """

        #env = gym.make('CartPole-v1')
        #model = DQN(MlpPolicy, env, verbose=1)
        #model.load("cartpole_model.pkl")
        model = DQN(env=env,
                    policy=CustomPolicy,
                    learning_rate=1e-3,
                    buffer_size=50000,
                    exploration_fraction=0.01,
                    exploration_final_eps=0.02,
                    verbose=1)
        model.learn(total_timesteps=args.max_timesteps, callback=self.callback)

        print("Saving model to cartpole_model.pkl")
        model.save("cartpole_model.pkl")


#if __name__ == '__main__':
#parser = argparse.ArgumentParser(description="Train DQN on cartpole")
#parser.add_argument('--max-timesteps', default=100000000, type=int, help="Maximum number of timesteps")
#args = parser.parse_args()
#main(args)
Ejemplo n.º 2
0
def main(args):
    """
    Train and save the DQN model, for the cartpole problem

    :param args: (ArgumentParser) the input arguments
    """

    # env = gym.make("CartPole-v0")
    # model = DQN(
    #     env=env,
    #     policy=MlpPolicy,
    #     verbose=1,
    #     learning_rate=1e-3,
    #     buffer_size=50000,
    #     exploration_fraction=0.1,
    #     exploration_final_eps=0.02,
    #     tensorboard_log='./log',
    # )
    # model.learn(total_timesteps=args.max_timesteps, callback=callback)

    # print("Saving model to cartpole_model.pkl")
    # model.save("cartpole_model.pkl")

    # env = Vrep_Env()
    env = gym.make('vrep-v0')

    model = DQN(
        env=env,
        gamma=0.95,
        policy=MlpPolicy,
        #policy=CustomPolicy,
        verbose=1,
        learning_rate=1e-4,
        buffer_size=50000,  #5000
        train_freq=1,
        learning_starts=100,
        batch_size=64,  # 32
        checkpoint_freq=3000,
        checkpoint_path='./model/',
        target_network_update_freq=300,
        prioritized_replay=True,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        tensorboard_log='./log',
    )
    # path = './model/'
    # model = DQN.load(path+'bk2_16/cartpole_model6000.pkl', env, tensorboard_log='./log')
    model.learn(total_timesteps=args.max_timesteps,
                callback=callback,
                log_interval=30)

    print("Saving model to slab_installing_model.pkl")
    model.save("slab_installing_model.pkl")
Ejemplo n.º 3
0
def train(env, fname):
    env.setRender(False)
    env.reset()
    
    start = time.time()
    model = DQN(
        env=env,
        policy=CustomPolicy,
        learning_rate=1e-3,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02
    )
    model.learn(total_timesteps=STEPS, callback=callback)

    # save trained model
    model.save(fname)
    print("Duration: %.1f" % ((time.time() - start)/60))
Ejemplo n.º 4
0
def main(args):
    """
    Train and save the DQN model, for the cartpole problem

    :param args: (ArgumentParser) the input arguments
    """
    env = gym.make("CartPole-v0")
    model = DQN(
        env=env,
        policy=MlpPolicy,
        learning_rate=1e-3,
        buffer_size=50000,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
    )
    model.learn(total_timesteps=args.max_timesteps, callback=callback)

    print("Saving model to cartpole_model.pkl")
    model.save("cartpole_model.pkl")
Ejemplo n.º 5
0
def main(args):
    """
    Train and save the DQN model, for the mountain car problem

    :param args: (ArgumentParser) the input arguments
    """
    env = gym.make("MountainCar-v0")

    # using layer norm policy here is important for parameter space noise!
    model = DQN(policy=CustomPolicy,
                env=env,
                learning_rate=1e-3,
                buffer_size=50000,
                exploration_fraction=0.1,
                exploration_final_eps=0.1,
                param_noise=True)
    model.learn(total_timesteps=args.max_timesteps)

    print("Saving model to mountaincar_model.pkl")
    model.save("mountaincar_model.pkl")