Ejemplo n.º 1
0
        'sgd_minibatch_size': 100 if large else 64,
        'train_batch_size': 10000 if large else 2048,
        "num_gpus": 0.25,
        "num_cpus_per_worker": args.num_cpus_per_worker,
        "num_cpus_for_driver": args.num_cpus_for_driver,
        "num_envs_per_worker": 8 if large else 10,
        'num_workers': 8 if large else 1,
        "callbacks": {"on_train_result": None}
    }

    if env_name == "FetchPush-v1":
        stop = int(3e6)
        config.update(
            num_workers=8,
            num_envs_per_worker=10,
            gamma=0.95,
            lr=5e-4,
            delay_update=not args.no_delay_update
        )

    train(
        "PPO",
        config=config,
        stop=stop,
        exp_name=exp_name,
        num_seeds=args.num_seeds,
        num_gpus=args.num_gpus,
        test_mode=args.test,
        keep_checkpoints_num=10
    )
Ejemplo n.º 2
0
    stop = int(5e7)

    config = {
        "num_sgd_iter": 10,
        "num_envs_per_worker": 1,
        "entropy_coeff": 0.001,
        "lambda": 0.95,
        "lr": 2.5e-4,

        # 'sample_batch_size': 200 if large else 50,
        # 'sgd_minibatch_size': 100 if large else 64,
        # 'train_batch_size': 10000 if large else 2048,
        "num_gpus": 1,
        "num_cpus_per_worker": 1,
        "num_cpus_for_driver": 2,
        'num_workers': 16
    }

    config.update(
        get_marl_env_config(env_name, tune.grid_search([args.num_agents])))

    train(
        DiCETrainer,
        config=config,
        stop=stop,
        exp_name=exp_name,
        num_seeds=args.num_seeds,
        num_gpus=args.num_gpus,
        test_mode=args.test,
    )
Ejemplo n.º 3
0
        1000,
        "sample_batch_size":
        1,
        "train_batch_size":
        256,
        "target_network_update_freq":
        1,
        "timesteps_per_iteration":
        1000,
        "learning_starts":
        10000,
        "clip_actions":
        False,
        "normalize_actions":
        True,
        "evaluation_interval":
        1,
        "metrics_smoothing_episodes":
        5,
        "num_cpus_for_driver":
        2,
    }

    train("SAC",
          exp_name=exp_name,
          keep_checkpoints_num=5,
          stop=stop,
          config=sac_config,
          num_gpus=args.num_gpus,
          num_seeds=args.num_seeds)
Ejemplo n.º 4
0
        "num_sgd_iter": 10,
        "lr": tune.grid_search([0.0001, 5e-5]),
        'rollout_fragment_length': 50,
        'sgd_minibatch_size': 64,
        'train_batch_size': 2048,
        "num_gpus": 0.2,
        "num_envs_per_worker": 5,
        'num_workers': 7,
        "num_cpus_per_worker": 0.5,
        "num_cpus_for_driver": 0.5,

        constants.USE_BISECTOR: tune.grid_search([False]),
        constants.ONLY_TNB: tune.grid_search([True, False]),
        # "vf_ratio_clip_param": tune.grid_search([0.05]),
    }

    config.update(
        get_marl_env_config(env_name, tune.grid_search([args.num_agents]))
    )

    train(
        DiCETrainer,
        config=config,
        stop=stop,
        exp_name=exp_name,
        num_seeds=args.num_seeds,
        num_gpus=args.num_gpus,
        test_mode=args.test,
        keep_checkpoints_num=5
    )
Ejemplo n.º 5
0
        config["model"]["custom_options"].pop("std_mode")

    if algo in ["ES", "ARS"]:
        config["num_gpus"] = 0
        config["num_cpus_per_worker"] = 0.5
        config["num_workers"] = 15
        config["num_cpus_for_driver"] = 0.5

    # # test
    # config["model"]["custom_options"]["num_components"] = 2
    # initialize_ray(test_mode=True, local_mode=True)
    # trainer = GaussianESTrainer(config=config, env="BipedalWalker-v2")

    if args.redis_password:
        from toolbox import initialize_ray
        import os

        initialize_ray(address=os.environ["ip_head"],
                       test_mode=args.test,
                       redis_password=args.redis_password)

    train(get_dynamic_trainer(algo),
          config=config,
          stop=stop,
          exp_name=exp_name,
          num_seeds=args.num_seeds,
          num_gpus=args.num_gpus,
          test_mode=test,
          keep_checkpoints_num=5,
          start_seed=args.start_seed)
Ejemplo n.º 6
0
        "kl_coeff": 1.0,
        "num_sgd_iter": 10,
        "lr": 0.0001,
        'rollout_fragment_length': 200 if large else 50,
        'sgd_minibatch_size': 100 if large else 64,
        'train_batch_size': 10000 if large else 2048,
        "num_gpus": 0.25,
        "num_cpus_per_worker": args.num_cpus_per_worker,
        "num_cpus_for_driver": args.num_cpus_for_driver,
        "num_envs_per_worker": 8 if large else 10,
        'num_workers': 8 if large else 1,
        "callbacks": {
            "on_train_result": None
        }
    }

    if args.ppo:
        config["env"] = env_name
    else:
        config.update(
            get_marl_env_config(env_name, tune.grid_search([args.num_agents])))

    train("PPO" if args.ppo else DiCETrainer,
          config=config,
          stop=stop,
          exp_name=exp_name,
          num_seeds=args.num_seeds,
          num_gpus=args.num_gpus,
          test_mode=args.test,
          keep_checkpoints_num=3)
Ejemplo n.º 7
0
        8,
        "entropy_coeff":
        0.001,
        "lambda":
        0.95,
        "lr":
        2.5e-4,
        "num_gpus":
        1,  # Force to run 2 concurrently
        "evolution": {
            "train_batch_size": 4000,  # The same as PPO
            "num_workers": 10,  # default is 10,
            "num_cpus_per_worker": 0.5,
            "optimizer_type": "adam" if args.adam_optimizer else "sgd"
        },

        # locomotion config
        "kl_coeff":
        1.0,
    }

    train(EPTrainer,
          config=config,
          stop=1e7,
          exp_name=exp_name,
          num_seeds=args.num_seeds,
          test_mode=args.test,
          checkpoint_freq=args.checkpoint_freq,
          start_seed=args.start_seed,
          verbose=1)