Ejemplo n.º 1
0
def main(args):
    if args.dir is None:
        print(
            ' ---------- Please mention current experiment directory ----------'
        )
        return

    # Directory of current experiment
    base_dir = os.path.dirname(os.path.realpath(__file__))
    experiment_dir = os.path.join(base_dir, args.agent_type, args.dir)

    # load traing/testing parameters
    params = load_parameters(file=os.path.join(experiment_dir, 'params.dat'))
    # print('env: ', params.environment)
    # print('action_repeat: ', params.action_repeat)
    # print('agent: ', params.agent)
    # print('training_episodes: ', params.training_episodes)
    # print('training_steps_per_episode: ', params.training_steps_per_episode)
    # print('testing_episodes: ', params.testing_episodes)
    # print('testing_steps_per_episode: ', params.testing_steps_per_episode)
    # print('epsilon_start: ', params.hyperparameters['epsilon_start'])
    # print('epsilon_end: ', params.hyperparameters['epsilon_end'])
    # print('epsilon_decay: ', params.hyperparameters['epsilon_decay'])
    # print('epsilon_steps: ', params.hyperparameters['epsilon_steps'])
    # print('use_cuda: ', params.hyperparameters['use_cuda'])
    # print('learning_rate: ', params.hyperparameters['learning_rate'])
    # print('batch_size: ', params.hyperparameters['batch_size'])
    # print('discount_rate: ', params.hyperparameters['discount_rate'])
    # print('target_network_update_frequency: ', params.hyperparameters['target_network_update_frequency'])

    # Initialize the environment
    env = gym.make(params.environment)
    state_size = env.observation_space.shape
    action_size = env.action_space.n

    # Initialize the agent
    agent = DQNAgent(state_size=state_size,
                     action_size=action_size,
                     hyperparameters=params.hyperparameters)

    if args.train:
        trainer = Trainer(env=env,
                          agent=agent,
                          params=params,
                          exp_dir=experiment_dir)

        try:
            trainer.train()

        except KeyboardInterrupt:
            trainer.close()
            sys.exit(0)

        finally:
            print('\ndone.')

    if args.retrain:
        trainer = Trainer(env=env,
                          agent=agent,
                          params=params,
                          exp_dir=experiment_dir,
                          retrain=True)

        try:
            trainer.retrain()

        except KeyboardInterrupt:
            trainer.close()
            sys.exit(0)

        finally:
            print('\ndone.')

    if args.test:
        tester = Tester(env=env,
                        agent=agent,
                        params=params,
                        exp_dir=experiment_dir)

        try:
            tester.test()

        except KeyboardInterrupt:
            try:
                tester.close()
                sys.exit(0)
            except SystemExit:
                tester.close()
                os._exit(0)
Ejemplo n.º 2
0
def main(args):

    # Directory of current experiment
    experiment_dir = 'experiments/dqn_lstm/test1'

    # Load configuration
    config = Config()

    config.env = args.env

    config.hyperparameters = {
        "learning_rate": 0.025,
        "batch_size": 32,
        "sequence_length": 1,
        "buffer_size": int(1e5),
        "update_every_n_steps": 1,
        "min_steps_before_learning": 1000,
        "epsilon_start": 1,
        "epsilon_end": 0.1,
        "epsilon_decay": 0.995,
        "discount_rate": 0.99,
        "tau": 0.01,
    }

    config.use_cuda = True

    config.number_of_episodes = 500
    config.steps_per_episode = 500
    config.previous_episode = 0
    config.total_steps = 160000
    config.pre_train_steps = 100
    config.learing_frequency = 1

    config.checkpoint = True
    config.checkpoint_interval = 1
    config.checkpoint_dir = experiment_dir + '/checkpoints'

    config.log_dir = experiment_dir + '/logs'

    config.model_dir = experiment_dir + '/model'

    # Initialize the environment
    env = gym.make('Urban-v0')
    config.state_dim = env.observation_space.shape
    config.action_dim = env.action_space.n

    # Initialize the agent
    agent = DDQNAgent(config)

    # Initialize spawner
    spawner = Spawner()

    if args.train:
        trainer = Trainer(env, agent, spawner, config)

        try:
            trainer.train()

        except KeyboardInterrupt:
            try:
                trainer.close()
                sys.exit(0)
            except SystemExit:
                trainer.close()
                os._exit(0)

    elif args.retrain:
        if args.checkpoint_file is None:
            print(
                ' ---------- Please mention checkoutpoint file name ----------'
            )
            return

        trainer = Trainer(env, agent, spawner, config)
        trainer.load_checkpoint(args.checkpoint_file)

        try:
            trainer.retrain()

        except KeyboardInterrupt:
            try:
                trainer.close()
                sys.exit(0)
            except SystemExit:
                trainer.close()
                os._exit(0)

    elif args.test:
        tester = Tester(episodes, steps)
        tester.load_checkpoint(args.checkpoint_file)

        try:
            tester.retrain()

        except KeyboardInterrupt:
            try:
                tester.close()
                sys.exit(0)
            except SystemExit:
                tester.close()
                os._exit(0)