Ejemplo n.º 1
0
    def test_run_multi_fcnet_td3_maddpg_shared(self):
        # Run the script; verify it executes without failure.
        args = parse_options('',
                             '',
                             args=[
                                 "multiagent-ring_small",
                                 "--shared",
                                 "--maddpg",
                                 "--initial_exploration_steps",
                                 "1",
                                 "--total_steps",
                                 "500",
                                 "--log_interval",
                                 "500",
                             ])
        run_multi_fcnet(args, 'data/multi-fcnet')

        # Check that the folders were generated.
        self.assertTrue(
            os.path.isdir(
                os.path.join(os.getcwd(),
                             "data/multi-fcnet/multiagent-ring_small")))

        # Clear anything that was generated.
        shutil.rmtree(os.path.join(os.getcwd(), "data"))
Ejemplo n.º 2
0
    def test_run_hrl_td3(self):
        # Run the script; verify it executes without failure.
        args = parse_options('',
                             '',
                             args=[
                                 "MountainCarContinuous-v0",
                                 "--initial_exploration_steps",
                                 "1",
                                 "--batch_size",
                                 "32",
                                 "--meta_period",
                                 "5",
                                 "--total_steps",
                                 "500",
                                 "--log_interval",
                                 "500",
                             ])
        run_hrl(args, 'data/goal-conditioned')

        # Check that the folders were generated.
        self.assertTrue(
            os.path.isdir(
                os.path.join(
                    os.getcwd(),
                    "data/goal-conditioned/MountainCarContinuous-v0")))

        # Clear anything that was generated.
        shutil.rmtree(os.path.join(os.getcwd(), "data"))
Ejemplo n.º 3
0
    def test_run_hrl_failure(self):
        # Run the script; verify it executes without failure.
        args = parse_options('', '', args=["MountainCarContinuous-v0",
                                           "--n_cpus", "1",
                                           "--total_steps", "2000",
                                           "--alg", "woops"])

        self.assertRaises(ValueError, run_hrl,
                          args=args, base_dir='data/goal-conditioned')

        # Clear anything that was generated.
        shutil.rmtree(os.path.join(os.getcwd(), "data"))
Ejemplo n.º 4
0
    def test_run_fcent_td3(self):
        # Run the script; verify it executes without failure.
        args = parse_options('', '', args=["MountainCarContinuous-v0",
                                           "--n_cpus", "1",
                                           "--total_steps", "2000"])
        run_fcnet(args, 'data/fcnet')

        # Check that the folders were generated.
        self.assertTrue(os.path.isdir(
            os.path.join(os.getcwd(), "data/fcnet/MountainCarContinuous-v0")))

        # Clear anything that was generated.
        shutil.rmtree(os.path.join(os.getcwd(), "data"))
Ejemplo n.º 5
0
    def test_run_multi_fcnet_td3_independent(self):
        # Run the script; verify it executes without failure.
        args = parse_options(
            '', '', args=["multiagent-ring0", "--total_steps", "2000"])
        run_multi_fcnet(args, 'data/multi-fcnet')

        # Check that the folders were generated.
        self.assertTrue(
            os.path.isdir(
                os.path.join(os.getcwd(),
                             "data/multi-fcnet/multiagent-ring0")))

        # Clear anything that was generated.
        shutil.rmtree(os.path.join(os.getcwd(), "data"))
Ejemplo n.º 6
0
    def test_run_fcent_failure(self):
        # Run the script; verify it fails.
        args = parse_options('',
                             '',
                             args=[
                                 "MountainCarContinuous-v0", "--total_steps",
                                 "2000", "--alg", "woops"
                             ])
        self.assertRaises(ValueError,
                          run_fcnet,
                          args=args,
                          base_dir='data/fcnet')

        # Clear anything that was generated.
        shutil.rmtree(os.path.join(os.getcwd(), "data"))
Ejemplo n.º 7
0
    def test_run_multi_fcnet_failure_maddpg_independent(self):
        # Run the script; verify it executes without failure.
        args = parse_options('',
                             '',
                             args=[
                                 "multiagent-ring0", "--maddpg",
                                 "--total_steps", "2000", "--alg", "woops"
                             ])

        self.assertRaises(ValueError,
                          run_multi_fcnet,
                          args=args,
                          base_dir='data/multi-fcnet')

        # Clear anything that was generated.
        shutil.rmtree(os.path.join(os.getcwd(), "data"))
Ejemplo n.º 8
0
    def test_run_multi_fcnet_failure_maddpg_shared(self):
        # Run the script; verify it executes without failure.
        args = parse_options('',
                             '',
                             args=[
                                 "MountainCarContinuous-v0", "--shared",
                                 "--maddpg", "--initial_exploration_steps",
                                 "1", "--total_steps", "500", "--log_interval",
                                 "500", "--alg", "woops"
                             ])

        self.assertRaises(ValueError,
                          run_multi_fcnet,
                          args=args,
                          base_dir='data/multi-fcnet')

        # Clear anything that was generated.
        shutil.rmtree(os.path.join(os.getcwd(), "data"))
Ejemplo n.º 9
0
    def test_run_hrl_failure(self):
        # Run the script; verify it executes without failure.
        args = parse_options('',
                             '',
                             args=[
                                 "MountainCarContinuous-v0",
                                 "--initial_exploration_steps", "1",
                                 "--batch_size", "32", "--meta_period", "5",
                                 "--total_steps", "500", "--log_interval",
                                 "500", "--alg", "woops"
                             ])

        self.assertRaises(ValueError,
                          run_hrl,
                          args=args,
                          base_dir='data/goal-conditioned')

        # Clear anything that was generated.
        shutil.rmtree(os.path.join(os.getcwd(), "data"))
Ejemplo n.º 10
0
    def test_run_multi_fcnet_failure_independent(self):
        # Run the script; verify it executes without failure.
        args = parse_options('',
                             '',
                             args=[
                                 "multiagent-ring_small",
                                 "--initial_exploration_steps", "1",
                                 "--total_steps", "500", "--log_interval",
                                 "500", "--alg", "woops"
                             ])

        self.assertRaises(ValueError,
                          run_multi_fcnet,
                          args=args,
                          base_dir='data/multi-fcnet')

        # Clear anything that was generated.
        ray.shutdown()
        shutil.rmtree(os.path.join(os.getcwd(), "data"))
Ejemplo n.º 11
0
    def test_run_hrl_sac(self):
        # Run the script; verify it executes without failure.
        args = parse_options('',
                             '',
                             args=[
                                 "MountainCarContinuous-v0", "--total_steps",
                                 "2000", "--alg", "SAC"
                             ])
        run_hrl(args, 'data/goal-conditioned')

        # Check that the folders were generated.
        self.assertTrue(
            os.path.isdir(
                os.path.join(
                    os.getcwd(),
                    "data/goal-conditioned/MountainCarContinuous-v0")))

        # Clear anything that was generated.
        shutil.rmtree(os.path.join(os.getcwd(), "data"))
Ejemplo n.º 12
0
    def test_run_fcent_sac(self):
        # Run the script; verify it executes without failure.
        args = parse_options('',
                             '',
                             args=[
                                 "MountainCarContinuous-v0",
                                 "--initial_exploration_steps", "1",
                                 "--total_steps", "500", "--log_interval",
                                 "500", "--alg", "SAC"
                             ])
        run_fcnet(args, 'data/fcnet')

        # Check that the folders were generated.
        self.assertTrue(
            os.path.isdir(
                os.path.join(os.getcwd(),
                             "data/fcnet/MountainCarContinuous-v0")))

        # Clear anything that was generated.
        ray.shutdown()
        shutil.rmtree(os.path.join(os.getcwd(), "data"))
Ejemplo n.º 13
0
        # Add the hyperparameters to the folder.
        with open(os.path.join(dir_name, 'hyperparameters.json'), 'w') as f:
            json.dump(params_with_extra, f, sort_keys=True, indent=4)

        run_exp(
            env=args.env_name,
            policy=GoalConditionedPolicy,
            hp=hp,
            steps=args.total_steps,
            dir_name=dir_name,
            evaluate=args.evaluate,
            seed=seed,
            eval_interval=args.eval_interval,
            log_interval=args.log_interval,
            save_interval=args.save_interval
        )


if __name__ == '__main__':
    # collect arguments
    args = parse_options(
        description='Test the performance of goal-conditioned hierarchical '
                    'models on various environments.',
        example_usage=EXAMPLE_USAGE,
        args=sys.argv[1:]
    )

    # execute the training procedure
    main(args, 'data/goal-conditioned')
Ejemplo n.º 14
0
        params_with_extra['policy_name'] = "FeedForwardPolicy"
        params_with_extra['algorithm'] = args.alg
        params_with_extra['date/time'] = now

        # Add the hyperparameters to the folder.
        with open(os.path.join(dir_name, 'hyperparameters.json'), 'w') as f:
            json.dump(params_with_extra, f, sort_keys=True, indent=4)

        run_exp(
            env=args.env_name,
            policy=FeedForwardPolicy,
            hp=hp,
            steps=args.total_steps,
            dir_name=dir_name,
            evaluate=args.evaluate,
            seed=seed,
            eval_interval=args.eval_interval,
            log_interval=args.log_interval,
            save_interval=args.save_interval,
            initial_exploration_steps=args.initial_exploration_steps,
        )


if __name__ == '__main__':
    main(
        parse_options(
            description='Test the performance of fully connected network '
            'models on various environments.',
            example_usage=EXAMPLE_USAGE,
            args=sys.argv[1:]), 'data/fcnet')
Ejemplo n.º 15
0
    def test_parse_options(self):
        # Test the default case.
        args = parse_options("", "", args=["AntMaze"])
        expected_args = {
            'env_name':
            'AntMaze',
            'alg':
            'TD3',
            'evaluate':
            False,
            'n_training':
            1,
            'total_steps':
            1000000,
            'seed':
            1,
            'log_interval':
            2000,
            'eval_interval':
            50000,
            'save_interval':
            50000,
            'initial_exploration_steps':
            10000,
            'nb_train_steps':
            1,
            'nb_rollout_steps':
            1,
            'nb_eval_episodes':
            50,
            'reward_scale':
            1,
            'render':
            False,
            'render_eval':
            False,
            'verbose':
            2,
            'actor_update_freq':
            2,
            'meta_update_freq':
            10,
            'noise':
            TD3_PARAMS['noise'],
            'target_policy_noise':
            TD3_PARAMS['target_policy_noise'],
            'target_noise_clip':
            TD3_PARAMS['target_noise_clip'],
            'target_entropy':
            SAC_PARAMS['target_entropy'],
            'buffer_size':
            FEEDFORWARD_PARAMS['buffer_size'],
            'batch_size':
            FEEDFORWARD_PARAMS['batch_size'],
            'actor_lr':
            FEEDFORWARD_PARAMS['actor_lr'],
            'critic_lr':
            FEEDFORWARD_PARAMS['critic_lr'],
            'tau':
            FEEDFORWARD_PARAMS['tau'],
            'gamma':
            FEEDFORWARD_PARAMS['gamma'],
            'layer_norm':
            False,
            'use_huber':
            False,
            'num_levels':
            GOAL_CONDITIONED_PARAMS['num_levels'],
            'meta_period':
            GOAL_CONDITIONED_PARAMS['meta_period'],
            'intrinsic_reward_scale':
            GOAL_CONDITIONED_PARAMS['intrinsic_reward_scale'],
            'relative_goals':
            False,
            'off_policy_corrections':
            False,
            'hindsight':
            False,
            'subgoal_testing_rate':
            GOAL_CONDITIONED_PARAMS['subgoal_testing_rate'],
            'use_fingerprints':
            False,
            'centralized_value_functions':
            False,
            'connected_gradients':
            False,
            'cg_weights':
            GOAL_CONDITIONED_PARAMS['cg_weights'],
            'shared':
            False,
            'maddpg':
            False,
        }
        self.assertDictEqual(vars(args), expected_args)

        # Test custom cases.
        args = parse_options("",
                             "",
                             args=[
                                 "AntMaze",
                                 '--evaluate',
                                 '--n_training',
                                 '1',
                                 '--total_steps',
                                 '2',
                                 '--seed',
                                 '3',
                                 '--log_interval',
                                 '4',
                                 '--eval_interval',
                                 '5',
                                 '--save_interval',
                                 '6',
                                 '--nb_train_steps',
                                 '7',
                                 '--nb_rollout_steps',
                                 '8',
                                 '--nb_eval_episodes',
                                 '9',
                                 '--reward_scale',
                                 '10',
                                 '--render',
                                 '--render_eval',
                                 '--verbose',
                                 '11',
                                 '--actor_update_freq',
                                 '12',
                                 '--meta_update_freq',
                                 '13',
                                 '--buffer_size',
                                 '14',
                                 '--batch_size',
                                 '15',
                                 '--actor_lr',
                                 '16',
                                 '--critic_lr',
                                 '17',
                                 '--tau',
                                 '18',
                                 '--gamma',
                                 '19',
                                 '--noise',
                                 '20',
                                 '--target_policy_noise',
                                 '21',
                                 '--target_noise_clip',
                                 '22',
                                 '--layer_norm',
                                 '--use_huber',
                                 '--num_levels',
                                 '23',
                                 '--meta_period',
                                 '24',
                                 '--intrinsic_reward_scale',
                                 '25',
                                 '--relative_goals',
                                 '--off_policy_corrections',
                                 '--hindsight',
                                 '--subgoal_testing_rate',
                                 '26',
                                 '--use_fingerprints',
                                 '--centralized_value_functions',
                                 '--connected_gradients',
                                 '--cg_weights',
                                 '27',
                                 '--shared',
                                 '--maddpg',
                             ])
        hp = get_hyperparameters(args, GoalConditionedPolicy)
        expected_hp = {
            'nb_train_steps': 7,
            'nb_rollout_steps': 8,
            'nb_eval_episodes': 9,
            'reward_scale': 10.0,
            'render': True,
            'render_eval': True,
            'verbose': 11,
            'actor_update_freq': 12,
            'meta_update_freq': 13,
            '_init_setup_model': True,
            'policy_kwargs': {
                'buffer_size': 14,
                'batch_size': 15,
                'actor_lr': 16.0,
                'critic_lr': 17.0,
                'tau': 18.0,
                'gamma': 19.0,
                'noise': 20.0,
                'target_policy_noise': 21.0,
                'target_noise_clip': 22.0,
                'layer_norm': True,
                'use_huber': True,
                'num_levels': 23,
                'meta_period': 24,
                'intrinsic_reward_scale': 25.0,
                'relative_goals': True,
                'off_policy_corrections': True,
                'hindsight': True,
                'subgoal_testing_rate': 26.0,
                'use_fingerprints': True,
                'centralized_value_functions': True,
                'connected_gradients': True,
                'cg_weights': 27.0,
            }
        }
        self.assertDictEqual(hp, expected_hp)
        self.assertEqual(args.log_interval, 4)
        self.assertEqual(args.eval_interval, 5)

        hp = get_hyperparameters(args, MultiFeedForwardPolicy)
        expected_hp = {
            'nb_train_steps': 7,
            'nb_rollout_steps': 8,
            'nb_eval_episodes': 9,
            'actor_update_freq': 12,
            'meta_update_freq': 13,
            'reward_scale': 10.0,
            'render': True,
            'render_eval': True,
            'verbose': 11,
            '_init_setup_model': True,
            'policy_kwargs': {
                'buffer_size': 14,
                'batch_size': 15,
                'actor_lr': 16.0,
                'critic_lr': 17.0,
                'tau': 18.0,
                'gamma': 19.0,
                'layer_norm': True,
                'use_huber': True,
                'noise': 20.0,
                'target_policy_noise': 21.0,
                'target_noise_clip': 22.0,
                'shared': True,
                'maddpg': True,
            }
        }
        self.assertDictEqual(hp, expected_hp)
        self.assertEqual(args.log_interval, 4)
        self.assertEqual(args.eval_interval, 5)
Ejemplo n.º 16
0
        # Add the hyperparameters to the folder.
        with open(os.path.join(dir_name, 'hyperparameters.json'), 'w') as f:
            json.dump(params_with_extra, f, sort_keys=True, indent=4)

        run_exp(
            env=args.env_name,
            policy=MultiGoalConditionedPolicy,
            hp=hp,
            steps=args.total_steps,
            dir_name=dir_name,
            evaluate=args.evaluate,
            seed=seed,
            eval_interval=args.eval_interval,
            log_interval=args.log_interval,
            save_interval=args.save_interval,
            initial_exploration_steps=args.initial_exploration_steps,
        )


if __name__ == '__main__':
    main(
        parse_options(
            description='Test the performance of multi-agent goal-conditioned '
            'hierarchical models on various environments.',
            example_usage=EXAMPLE_USAGE,
            args=sys.argv[1:],
            hierarchical=True,
            multiagent=True,
        ), 'data/multi-goal-conditioned')
Ejemplo n.º 17
0
        # Add the hyperparameters to the folder.
        with open(os.path.join(dir_name, 'hyperparameters.json'), 'w') as f:
            json.dump(params_with_extra, f, sort_keys=True, indent=4)

        run_exp(
            env=args.env_name,
            policy=MultiFeedForwardPolicy,
            hp=hp,
            steps=args.total_steps,
            dir_name=dir_name,
            evaluate=args.evaluate,
            seed=seed,
            eval_interval=args.eval_interval,
            log_interval=args.log_interval,
            save_interval=args.save_interval
        )


if __name__ == '__main__':
    # collect arguments
    args = parse_options(
        description='Test the performance of multi-agent fully connected '
                    'network models on various environments.',
        example_usage=EXAMPLE_USAGE,
        args=sys.argv[1:]
    )

    # execute the training procedure
    main(args, 'data/multi-fcnet')
Ejemplo n.º 18
0
def train_h_baselines(env_name, args, multiagent):
    """Train policies using SAC and TD3 with h-baselines."""
    from hbaselines.algorithms import OffPolicyRLAlgorithm
    from hbaselines.utils.train import parse_options, get_hyperparameters

    # Get the command-line arguments that are relevant here
    args = parse_options(description="", example_usage="", args=args)

    # the base directory that the logged data will be stored in
    base_dir = "training_data"

    for i in range(args.n_training):
        # value of the next seed
        seed = args.seed + i

        # The time when the current experiment started.
        now = strftime("%Y-%m-%d-%H:%M:%S")

        # Create a save directory folder (if it doesn't exist).
        dir_name = os.path.join(base_dir, '{}/{}'.format(args.env_name, now))
        ensure_dir(dir_name)

        # Get the policy class.
        if args.alg == "TD3":
            if multiagent:
                from hbaselines.multi_fcnet.td3 import MultiFeedForwardPolicy
                policy = MultiFeedForwardPolicy
            else:
                from hbaselines.fcnet.td3 import FeedForwardPolicy
                policy = FeedForwardPolicy
        elif args.alg == "SAC":
            if multiagent:
                from hbaselines.multi_fcnet.sac import MultiFeedForwardPolicy
                policy = MultiFeedForwardPolicy
            else:
                from hbaselines.fcnet.sac import FeedForwardPolicy
                policy = FeedForwardPolicy
        else:
            raise ValueError("Unknown algorithm: {}".format(args.alg))

        # Get the hyperparameters.
        hp = get_hyperparameters(args, policy)

        # Add the seed for logging purposes.
        params_with_extra = hp.copy()
        params_with_extra['seed'] = seed
        params_with_extra['env_name'] = args.env_name
        params_with_extra['policy_name'] = policy.__name__
        params_with_extra['algorithm'] = args.alg
        params_with_extra['date/time'] = now

        # Add the hyperparameters to the folder.
        with open(os.path.join(dir_name, 'hyperparameters.json'), 'w') as f:
            json.dump(params_with_extra, f, sort_keys=True, indent=4)

        # Create the algorithm object.
        alg = OffPolicyRLAlgorithm(
            policy=policy,
            env="flow:{}".format(env_name),
            eval_env="flow:{}".format(env_name) if args.evaluate else None,
            **hp)

        # Perform training.
        alg.learn(
            total_steps=args.total_steps,
            log_dir=dir_name,
            log_interval=args.log_interval,
            eval_interval=args.eval_interval,
            save_interval=args.save_interval,
            initial_exploration_steps=args.initial_exploration_steps,
            seed=seed,
        )
Ejemplo n.º 19
0
        # Add the hyperparameters to the folder.
        with open(os.path.join(dir_name, 'hyperparameters.json'), 'w') as f:
            json.dump(params_with_extra, f, sort_keys=True, indent=4)

        run_exp(
            env=args.env_name,
            policy=MultiFeedForwardPolicy,
            hp=hp,
            steps=args.total_steps,
            dir_name=dir_name,
            evaluate=args.evaluate,
            seed=seed,
            eval_interval=args.eval_interval,
            log_interval=args.log_interval,
            save_interval=args.save_interval,
            initial_exploration_steps=args.initial_exploration_steps,
        )


if __name__ == '__main__':
    main(
        parse_options(
            description='Test the performance of multi-agent fully connected '
            'network models on various environments.',
            example_usage=EXAMPLE_USAGE,
            args=sys.argv[1:],
            hierarchical=False,
            multiagent=True,
        ), 'data/multi-fcnet')
Ejemplo n.º 20
0
        with open(os.path.join(dir_name, 'hyperparameters.json'), 'w') as f:
            json.dump(params_with_extra, f, sort_keys=True, indent=4)

        run_exp(
            env=args.env_name,
            policy=FeedForwardPolicy,
            hp=hp,
            dir_name=dir_name,
            evaluate=args.evaluate,
            seed=seed,
            eval_interval=args.eval_interval,
            log_interval=args.log_interval,
            save_interval=args.save_interval,
            initial_exploration_steps=args.initial_exploration_steps,
            ckpt_path=args.ckpt_path,
        )


if __name__ == '__main__':
    main(
        parse_options(
            description='Test the performance of fully connected network '
                        'models on various environments.',
            example_usage=EXAMPLE_USAGE,
            args=sys.argv[1:],
            hierarchical=False,
            multiagent=False,
        ),
        'data/fcnet'
    )