コード例 #1
0
ファイル: mnist_ray.py プロジェクト: numenta/nupic.research
def run_experiment(config, trainable):
  """
  Run a single tune experiment in parallel as a "remote" function.

  :param config: The experiment configuration
  :type config: dict
  :param trainable: tune.Trainable class with your experiment
  :type trainable: :class:`ray.tune.Trainable`
  """
  # Stop criteria. Default to total number of iterations/epochs
  stop_criteria = {
    "training_iteration": config.get("iterations")
  }
  stop_criteria.update(config.get("stop", {}))

  tune.run(
    trainable,
    name=config["name"],
    local_dir=config["path"],
    stop=stop_criteria,
    config=config,
    num_samples=config.get("repetitions", 1),
    search_alg=config.get("search_alg", None),
    scheduler=config.get("scheduler", None),
    trial_executor=config.get("trial_executor", None),
    checkpoint_at_end=config.get("checkpoint_at_end", False),
    checkpoint_freq=config.get("checkpoint_freq", 0),
    resume=config.get("resume", False),
    reuse_actors=config.get("reuse_actors", False),
    verbose=config.get("verbose", 0)
  )
コード例 #2
0
ファイル: cifar_tune.py プロジェクト: numenta/nupic.research
def run_experiment(config, trainable):
  """
  Run a single tune experiment in parallel as a "remote" function.

  :param config: The experiment configuration
  :type config: dict
  :param trainable: tune.Trainable class with your experiment
  :type trainable: :class:`ray.tune.Trainable`
  """
  # Stop criteria. Default to total number of iterations/epochs
  stop_criteria = {
    "training_iteration": config.get("iterations")
  }
  stop_criteria.update(config.get("stop", {}))

  tune.run(
    trainable,
    name=config["name"],
    local_dir=config["path"],
    stop=stop_criteria,
    config=config,
    num_samples=config.get("repetitions", 1),
    search_alg=config.get("search_alg", None),
    scheduler=config.get("scheduler",
                         AsyncHyperBandScheduler(
                           reward_attr='mean_accuracy',
                           time_attr="training_iteration",
                           brackets = 2,
                           grace_period=max(1, int(config.get("iterations", 10)/10)),
                           reduction_factor=3,
                           max_t=config.get("iterations", 10)
                         )),
    trial_name_creator=tune.function(trial_name_string),
    trial_executor=config.get("trial_executor", None),
    checkpoint_at_end=config.get("checkpoint_at_end", False),
    checkpoint_freq=config.get("checkpoint_freq", 0),
    upload_dir=config.get("upload_dir", None),
    sync_function=config.get("sync_function", None),
    resume=config.get("resume", False),
    reuse_actors=config.get("reuse_actors", False),
    verbose=config.get("verbose", 0),
    resources_per_trial={
      "cpu": config.get("cpu_percentage", 1.0), 
      "gpu": config.get("gpu_percentage", 1.0),
    },
    # # added parameters to allow monitoring through REST API
    # with_server=True, 
    # server_port=4321,  
  )
コード例 #3
0
ファイル: cifar_tune.py プロジェクト: numenta/nupic.research
def run_experiment(config, trainable):
  """
  Run a single tune experiment in parallel as a "remote" function.

  :param config: The experiment configuration
  :type config: dict
  :param trainable: tune.Trainable class with your experiment
  :type trainable: :class:`ray.tune.Trainable`
  """
  # Stop criteria. Default to total number of iterations/epochs
  stop_criteria = {
    "training_iteration": config.get("iterations")
  }
  stop_criteria.update(config.get("stop", {}))

  tune.run(
    trainable,
    name=config["name"],
    local_dir=config["path"],
    stop=stop_criteria,
    config=config,
    num_samples=config.get("repetitions", 1),
    search_alg=config.get("search_alg", None),
    scheduler=config.get("scheduler",
                         MedianStoppingRule(
                           time_attr="training_iteration",
                           reward_attr='noise_accuracy',
                           min_samples_required=3,
                           grace_period=20,
                           verbose=False,
                         )),
    trial_name_creator=tune.function(trial_name_string),
    trial_executor=config.get("trial_executor", None),
    checkpoint_at_end=config.get("checkpoint_at_end", False),
    checkpoint_freq=config.get("checkpoint_freq", 0),
    upload_dir=config.get("upload_dir", None),
    sync_function=config.get("sync_function", None),
    resume=config.get("resume", False),
    reuse_actors=config.get("reuse_actors", False),
    verbose=config.get("verbose", 0),
    resources_per_trial={
      # With lots of trials, optimal seems to be 0.5, or 2 trials per GPU
      # If num trials <= num GPUs, 1.0 is better
      "cpu": 1, "gpu": config.get("gpu_percentage", 0.5),
    }
  )
コード例 #4
0
def run_experiment(config, trainable, num_cpus=1, num_gpus=0):
  """
  Run a single tune experiment in parallel as a "remote" function.

  :param config: The experiment configuration
  :type config: dict
  :param trainable: tune.Trainable class with your experiment
  :type trainable: :class:`ray.tune.Trainable`
  """
  resources_per_trial = {"cpu": num_cpus, "gpu": num_gpus}
  print("experiment =", config["name"])
  print("resources_per_trial =", resources_per_trial)

  # Stop criteria. Default to total number of iterations/epochs
  stop_criteria = {"training_iteration": config.get("iterations")}
  stop_criteria.update(config.get("stop", {}))
  print("stop_criteria =", stop_criteria)

  tune.run(
    trainable,
    name=config["name"],
    stop=stop_criteria,
    config=config,
    resources_per_trial=resources_per_trial,
    num_samples=config.get("repetitions", 1),
    local_dir=config.get("path", None),
    upload_dir=config.get("upload_dir", None),
    sync_function=config.get("sync_function", None),
    checkpoint_freq=config.get("checkpoint_freq", 0),
    checkpoint_at_end=config.get("checkpoint_at_end", False),
    export_formats=config.get("", None),
    search_alg=config.get("search_alg", None),
    scheduler=config.get("scheduler", None),
    verbose=config.get("verbose", 2),
    resume=config.get("resume", False),
    queue_trials=config.get("queue_trials", False),
    reuse_actors=config.get("reuse_actors", False),
    trial_executor=config.get("trial_executor", None),
    raise_on_failed_trial=config.get("raise_on_failed_trial", True)
  )
コード例 #5
0
ファイル: rllib.py プロジェクト: Duckkkky/smarts
def main(
    scenario: object,
    headless: object,
    time_total_s: object,
    seed: object,
    num_samples: object,
    num_agents: object,
    num_workers: object,
    resume_training: object,
    result_dir: object,
    checkpoint_num: object,
    save_model_path: object,
) -> object:
    pbt = PopulationBasedTraining(
        time_attr="time_total_s",
        metric="episode_reward_mean",
        mode="max",
        perturbation_interval=300,
        resample_probability=0.25,
        # Specifies the mutations of these hyperparams
        hyperparam_mutations={
            "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
            "rollout_fragment_length": lambda: random.randint(128, 16384),
            "train_batch_size": lambda: random.randint(2000, 160000),
        },
        # Specifies additional mutations after hyperparam_mutations is applied
        custom_explore_fn=explore,
    )

    # XXX: There is a bug in Ray where we can only export a trained model if
    #      the policy it's attached to is named 'default_policy'.
    #      See: https://github.com/ray-project/ray/issues/5339
    rllib_policies = {
        "default_policy": (
            None,
            rllib_agent["observation_space"],
            rllib_agent["action_space"],
            {
                "model": {
                    "custom_model": TrainingModel.NAME,
                    # "max_seq_len": 50,
                    "custom_model_config": {
                        "num_transformer_units": 1,
                        "attn_dim": 64,
                        "num_heads": 2,
                        "memory_tau": 50,
                        "head_dim": 32,
                        "ff_hidden_dim": 32,
                    },
                }
            },
        )
    }

    smarts.core.seed(seed)
    tune_config = {
        "env": RLlibHiWayEnv,
        "log_level": "WARN",
        "num_workers": num_workers,
        "env_config": {
            "seed": tune.sample_from(lambda spec: random.randint(0, 300)),
            "scenarios":
            [str(Path(scenario).expanduser().resolve().absolute())],
            "headless": headless,
            "agent_specs": {
                f"AGENT-{i}": rllib_agent["agent_spec"]
                for i in range(num_agents)
            },
        },
        "multiagent": {
            "policies": rllib_policies
        },
        "callbacks": {
            "on_episode_start": on_episode_start,
            "on_episode_step": on_episode_step,
            "on_episode_end": on_episode_end,
        },
    }

    experiment_name = "rllib_example_multi"
    result_dir = Path(result_dir).expanduser().resolve().absolute()
    if checkpoint_num:
        checkpoint = str(result_dir / f"checkpoint_{checkpoint_num}" /
                         f"checkpoint-{checkpoint_num}")
    else:
        checkpoint = None

    print(f"Checkpointing at {str(result_dir)}")
    analysis = tune.run(
        "PPO",
        name=experiment_name,
        stop={"time_total_s": time_total_s},
        checkpoint_freq=1,
        checkpoint_at_end=True,
        local_dir=str(result_dir),
        resume=resume_training,
        restore=checkpoint,
        max_failures=30000,
        num_samples=num_samples,
        export_formats=["model", "checkpoint"],
        config=tune_config,
        scheduler=pbt,
    )

    print(analysis.dataframe().head())

    best_logdir = Path(analysis.get_best_logdir("episode_reward_max"))
    model_path = best_logdir / "model"

    copy_tree(str(model_path), save_model_path, overwrite=True)
    print(f"Wrote model to: {save_model_path}")
コード例 #6
0
ファイル: run.py プロジェクト: numenta/nupic.research
def train(config, experiments, num_cpus, num_gpus, redis_address, show_list):
  print("config =", config.name)
  print("num_gpus =", num_gpus)
  print("num_cpus =", num_cpus)
  print("redis_address =", redis_address)

  # Use configuration file location as the project location.
  project_dir = os.path.dirname(config.name)
  project_dir = os.path.abspath(project_dir)
  print("project_dir =", project_dir)

  # Load and parse experiment configurations
  configs = parse_config(config, experiments, globals=globals())

  if show_list:
    print("Experiments:", list(configs.keys()))
    return

  # Initialize ray cluster
  if redis_address is not None:
    ray.init(redis_address=redis_address, include_webui=True)
    num_cpus = 1
  else:
    ray.init(num_cpus=num_cpus, num_gpus=num_gpus, local_mode=num_cpus == 1)

  # Run experiments
  resources_per_trial = {"cpu": 1, "gpu": num_gpus / num_cpus}
  print("resources_per_trial =", resources_per_trial)
  for exp in configs:
    print("experiment =", exp)
    config = configs[exp]
    config["name"] = exp

    # Stop criteria. Default to total number of iterations/epochs
    stop_criteria = {"training_iteration": config.get("iterations")}
    stop_criteria.update(config.get("stop", {}))
    print("stop_criteria =", stop_criteria)

    # Make sure local directories are relative to the project location
    path = config.get("path", None)
    if path and not os.path.isabs(path):
      config["path"] = os.path.join(project_dir, path)

    data_dir = config.get("data_dir", "data")
    if not os.path.isabs(data_dir):
      config["data_dir"] = os.path.join(project_dir, data_dir)

    tune.run(
      SpeechExperimentTune,
      name=config["name"],
      stop=stop_criteria,
      config=config,
      resources_per_trial=resources_per_trial,
      num_samples=config.get("repetitions", 1),
      local_dir=config.get("path", None),
      upload_dir=config.get("upload_dir", None),
      sync_function=config.get("sync_function", None),
      checkpoint_freq=config.get("checkpoint_freq", 0),
      checkpoint_at_end=config.get("checkpoint_at_end", False),
      export_formats=config.get("", None),
      search_alg=config.get("search_alg", None),
      scheduler=config.get("scheduler", None),
      verbose=config.get("verbose", 2),
      resume=config.get("resume", False),
      queue_trials=config.get("queue_trials", False),
      reuse_actors=config.get("reuse_actors", False),
      trial_executor=config.get("trial_executor", None),
      raise_on_failed_trial=config.get("raise_on_failed_trial", True)
    )

  ray.shutdown()
コード例 #7
0
ファイル: rllib.py プロジェクト: zhutiansama/CS-Notes-1
from ray import tune
from ray.rllib.agents.ppo import PPOTrainer
tune.run(PPOTrainer, config={"env": "CartPole-v0"})
コード例 #8
0
def main():
    ray.init()
    logging.getLogger().setLevel(logging.INFO)
    date = datetime.now().strftime('%Y%m%d_%H%M%S')
    parser = argparse.ArgumentParser()
    # parser.add_argument('--scenario', type=str, default='PongNoFrameskip-v4')
    parser.add_argument('--config',
                        type=str,
                        default='config/global_config.json',
                        help='config file')
    parser.add_argument('--algo',
                        type=str,
                        default='DQN',
                        choices=['DQN', 'DDQN', 'DuelDQN'],
                        help='choose an algorithm')
    parser.add_argument('--inference',
                        action="store_true",
                        help='inference or training')
    parser.add_argument('--ckpt', type=str, help='inference or training')
    parser.add_argument('--epoch',
                        type=int,
                        default=10,
                        help='number of training epochs')
    parser.add_argument(
        '--num_step',
        type=int,
        default=10**3,
        help='number of timesteps for one episode, and for inference')
    parser.add_argument('--save_freq',
                        type=int,
                        default=100,
                        help='model saving frequency')
    parser.add_argument('--batch_size',
                        type=int,
                        default=1,
                        help='model saving frequency')
    parser.add_argument('--state_time_span',
                        type=int,
                        default=5,
                        help='state interval to receive long term state')
    parser.add_argument('--time_span',
                        type=int,
                        default=30,
                        help='time interval to collect data')

    args = parser.parse_args()

    model_dir = "model/{}_{}".format(args.algo, date)
    result_dir = "result/{}_{}".format(args.algo, date)

    config_env = env_config(args)
    # ray.tune.register_env('gym_cityflow', lambda env_config:CityflowGymEnv(config_env))
    new_config_env = env_config(args, True)
    config_agent = agent_config(config_env)
    new_config_agent = agent_config(new_config_env)
    # # build cityflow environment

    tune.run('DQN',
             stop={"training_iteration": 100},
             config=config_agent,
             local_dir='~/ray_results/training/',
             checkpoint_freq=1)

    print(
        '-------------------------------training over----------------------------'
    )
    tune.run('DQN',
             stop={"training_iteration": 1000},
             config=new_config_agent,
             restore='~/ray_results/training/DQN',
             checkpoint_freq=1)
コード例 #9
0
    # This dict could mix continuous dimensions and discrete dimensions,
    # for example:
    dim_dict = {
        # for continuous dimensions: (continuous, search_range, precision)
        "height": (ValueType.CONTINUOUS, [-10, 10], 1e-2),
        # for discrete dimensions: (discrete, search_range, has_order)
        "width": (ValueType.DISCRETE, [-10, 10], False)
    }

    config = {
        "num_samples": 10 if args.smoke_test else 1000,
        "config": {
            "steps": 10,  # evaluation times
        }
    }

    zoopt_search = ZOOptSearch(
        algo="Asracos",  # only support ASRacos currently
        budget=config["num_samples"],
        dim_dict=dim_dict,
        metric="mean_loss",
        mode="min")

    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")

    tune.run(easy_objective,
             search_alg=zoopt_search,
             name="zoopt_search",
             scheduler=scheduler,
             **config)
コード例 #10
0
    # >> lstm_cell_size = config["model"]["lstm_cell_size"]
    # >> env = StatelessCartPole()
    # >> obs = env.reset()
    # >>
    # >> # range(2) b/c h- and c-states of the LSTM.
    # >> init_state = state = [
    # ..     np.zeros([lstm_cell_size], np.float32) for _ in range(2)
    # .. ]
    # >> prev_a = 0
    # >> prev_r = 0.0
    # >>
    # >> while True:
    # >>     a, state_out, _ = trainer.compute_action(
    # ..         obs, state, prev_a, prev_r)
    # >>     obs, reward, done, _ = env.step(a)
    # >>     if done:
    # >>         obs = env.reset()
    # >>         state = init_state
    # >>         prev_a = 0
    # >>         prev_r = 0.0
    # >>     else:
    # >>         state = state_out
    # >>         prev_a = a
    # >>         prev_r = reward

    results = tune.run(args.run, config=config, stop=stop, verbose=2)

    if args.as_test:
        check_learning_achieved(results, args.stop_reward)
    ray.shutdown()
コード例 #11
0
ファイル: tune_32x4.py プロジェクト: stjordanis/ray
        "max_depth": tune.randint(1, 9)
    }

    ray.init(address="auto")

    ray_params = RayParams(
        elastic_training=False,
        max_actor_restarts=2,
        num_actors=4,
        cpus_per_actor=1,
        gpus_per_actor=0)

    start = time.time()
    analysis = tune.run(
        tune.with_parameters(train_wrapper, ray_params=ray_params),
        config=search_space,
        num_samples=32,
        resources_per_trial=ray_params.get_tune_resources())
    taken = time.time() - start

    result = {
        "time_taken": taken,
        "trial_states": dict(
            Counter([trial.status for trial in analysis.trials]))
    }
    test_output_json = os.environ.get("TEST_OUTPUT_JSON",
                                      "/tmp/tune_32x4.json")
    with open(test_output_json, "wt") as f:
        json.dump(result, f)

    print("PASSED.")
コード例 #12
0
TNBESTrainer = PPOESTrainer.with_updates(
    name="TNBES",
    validate_config=validate_config,
    make_policy_optimizer=make_policy_optimizer_tnbes,
    default_config=tnbes_config,
    default_policy=TNBESPolicy)

if __name__ == '__main__':
    # Test codes
    initialize_ray(test_mode=True, local_mode=False)
    env_name = "CartPole-v0"
    num_agents = 3
    config = {
        "num_sgd_iter": 2,
        "train_batch_size": 400,
        "env": MultiAgentEnvWrapper,
        "env_config": {
            "env_name": env_name,
            "num_agents": num_agents
        },
        "update_steps": 1000,
        "use_tnb_plus": tune.grid_search([True, False]),
        "novelty_type": tune.grid_search(["mse", 'kl']),
        "use_novelty_value_network": True
    }
    tune.run(TNBESTrainer,
             name="DELETEME_TEST",
             verbose=2,
             stop={"timesteps_total": 10000},
             config=config)
コード例 #13
0
            "convert_to_discrete_action_space": True,
            "wrap_for_bandits": True,
        },
    }

    # Actual training_iterations will be 10 * timesteps_per_iteration
    # (100 by default) = 2,000
    training_iterations = 10

    print("Running training for %s time steps" % training_iterations)

    start_time = time.time()
    analysis = tune.run(
        "BanditLinUCB",
        config=config,
        stop={"training_iteration": training_iterations},
        num_samples=1,
        checkpoint_at_end=False,
    )

    print("The trials took", time.time() - start_time, "seconds\n")

    # Analyze cumulative regrets of the trials
    frame = pd.DataFrame()
    for key, df in analysis.trial_dataframes.items():
        frame = frame.append(df, ignore_index=True)
    x = frame.groupby(
        "agent_timesteps_total")["episode_reward_mean"].aggregate(
            ["mean", "max", "min", "std"])

    plt.plot(x["mean"])
コード例 #14
0

if __name__ == '__main__':
    ray.init(local_mode=False, log_to_driver=False)
    cpu = 7
    trials = list(_iter())
    n_trials = len(trials) - 1
    print(f"Total n of trials: {n_trials}")
    start_from = 0  # 7 stoppign_car
    name_group = NameGroup()
    for i, (problem, method, other_config) in enumerate(trials):
        if i < start_from:
            continue
        print(f"Starting trial: {i}/{n_trials}")
        analysis = tune.run(
            run_parameterised_experiment,
            name="experiment_collection_cartpole_iterations",
            config={
                "main_params": (problem, method, other_config),
                "n_workers": cpu
            },
            resources_per_trial={"cpu": 1},
            stop={"time_since_restore": 500},
            trial_name_creator=name_group.trial_str_creator,
            # resume="PROMPT",
            verbose=0,
            log_to_file=True)
        # df = analysis.results_df
        # df.to_json(os.path.join(analysis.best_logdir, "experiment_results.json"))
        print(f"Finished trial: {i}/{n_trials}")
コード例 #15
0
def main(args):
    # ====================================
    # init env config
    # ====================================
    if args.no_debug:
        ray.init(webui_host="127.0.0.1")
    else:
        ray.init(local_mode=True, webui_host="127.0.0.1")
    # use ray cluster for training
    # ray.init(
    #     address="auto" if args.address is None else args.address,
    #     redis_password="******",
    # )
    #
    # print(
    #     "--------------- Ray startup ------------\n{}".format(
    #         ray.state.cluster_resources()
    #     )
    # )

    agent_specs = {"AGENT-007": agent_spec}

    env_config = {
        "seed": 42,
        "scenarios": [scenario_paths],
        "headless": args.headless,
        "agent_specs": agent_specs,
    }

    # ====================================
    # init tune config
    # ====================================
    class MultiEnv(RLlibHiWayEnv):
        def __init__(self, env_config):
            env_config["scenarios"] = [
                scenario_paths[(env_config.worker_index - 1) %
                               len(scenario_paths)]
            ]
            super(MultiEnv, self).__init__(config=env_config)

    ModelCatalog.register_custom_model("my_fc", FullyConnectedNetwork)
    tune_config = {
        "env": MultiEnv,
        "env_config": env_config,
        "multiagent": {
            "policies": {
                "default_policy": (
                    None,
                    OBSERVATION_SPACE,
                    ACTION_SPACE,
                    {},
                )
            },
            "policy_mapping_fn": lambda agent_id: "default_policy",
        },
        "model": {
            "custom_model": "my_fc",
        },
        "framework": "torch",
        "callbacks": {
            "on_episode_start": on_episode_start,
            "on_episode_step": on_episode_step,
            "on_episode_end": on_episode_end,
        },
        "lr": 1e-4,
        "log_level": "WARN",
        "num_workers": args.num_workers,
        "horizon": args.horizon,
        "train_batch_size": 10240 * 3,

        # "observation_filter": "MeanStdFilter",
        # "batch_mode": "complete_episodes",
        # "grad_clip": 0.5,

        # "model":{
        #     "use_lstm": True,
        # },
    }

    tune_config.update({
        "lambda": 0.95,
        "clip_param": 0.2,
        "num_sgd_iter": 10,
        "sgd_minibatch_size": 1024,
        "gamma": 0.995,
        # "l2_coeff": 5e-4,
    })

    # ====================================
    # init log and checkpoint dir_info
    # ====================================
    experiment_name = EXPERIMENT_NAME.format(
        scenario="multi_scenarios",
        algorithm="PPO",
        n_agent=1,
    )

    log_dir = Path(args.log_dir).expanduser().absolute() / RUN_NAME
    log_dir.mkdir(parents=True, exist_ok=True)
    print(f"Checkpointing at {log_dir}")

    if args.restore:
        restore_path = Path(args.restore).expanduser()
        print(f"Loading model from {restore_path}")
    else:
        restore_path = None

    # run experiments
    analysis = tune.run(
        PPOTrainer,
        # "PPO",
        name=experiment_name,
        stop={"time_total_s": 24 * 60 * 60},
        checkpoint_freq=2,
        checkpoint_at_end=True,
        local_dir=str(log_dir),
        resume=args.resume,
        restore=restore_path,
        max_failures=1000,
        export_formats=["model", "checkpoint"],
        config=tune_config,
    )

    print(analysis.dataframe().head())
コード例 #16
0
        })

    # __pbt_end__

    # __tune_begin__
    class Stopper:
        def __init__(self):
            self.should_stop = False

        def stop(self, trial_id, result):
            max_iter = 5 if args.smoke_test else 100
            if not self.should_stop and result["mean_accuracy"] > 0.96:
                self.should_stop = True
            return self.should_stop or result["training_iteration"] >= max_iter

    stopper = Stopper()

    analysis = tune.run(PytorchTrainble,
                        name="pbt_test",
                        scheduler=scheduler,
                        reuse_actors=True,
                        verbose=1,
                        stop=stopper.stop,
                        export_formats=[ExportFormat.MODEL],
                        num_samples=4,
                        config={
                            "lr": tune.uniform(0.001, 1),
                            "momentum": tune.uniform(0.001, 1),
                        })
    # __tune_end__
コード例 #17
0
        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "framework": args.framework,
    }

    stop = {
        "timesteps_total": args.stop_timesteps,
        "training_iteration": args.stop_iters,
    }

    # Train the "main" policy to play really well using self-play.
    results = None
    if not args.from_checkpoint:
        results = tune.run("PPO",
                           config=config,
                           stop=stop,
                           checkpoint_at_end=True,
                           checkpoint_freq=10,
                           verbose=1)

    # Restore trained trainer (set to non-explore behavior) and play against
    # human on command line.
    if args.num_episodes_human_play > 0:
        num_episodes = 0
        trainer = PPOTrainer(config=dict(config, **{"explore": False}))
        if args.from_checkpoint:
            trainer.restore(args.from_checkpoint)
        else:
            checkpoint = results.get_last_checkpoint()
            if not checkpoint:
                raise ValueError("No last checkpoint found in results!")
            trainer.restore(checkpoint)
コード例 #18
0
ファイル: cartpole_server.py プロジェクト: krfricke/ray
            print("Restoring from checkpoint path", checkpoint_path)
            trainer.restore(checkpoint_path)

        # Serving and training loop.
        ts = 0
        for _ in range(args.stop_iters):
            results = trainer.train()
            print(pretty_print(results))
            checkpoint = trainer.save()
            print("Last checkpoint", checkpoint)
            with open(checkpoint_path, "w") as f:
                f.write(checkpoint)
            if (results["episode_reward_mean"] >= args.stop_reward
                    or ts >= args.stop_timesteps):
                break
            ts += results["timesteps_total"]

    # Run with Tune for auto env and trainer creation and TensorBoard.
    else:
        stop = {
            "training_iteration": args.stop_iters,
            "timesteps_total": args.stop_timesteps,
            "episode_reward_mean": args.stop_reward,
        }

        tune.run(args.run,
                 config=config,
                 stop=stop,
                 verbose=2,
                 restore=checkpoint_path)
コード例 #19
0
ファイル: trainer.py プロジェクト: slaifan/singularitySDS
def main(args):
    sumo_scenario = SumoScenario(
        scenario_root=os.path.abspath(args.scenario),
        random_social_vehicle_count=args.num_social_vehicles)

    tune_config = {
        'env': CompetitionEnv,
        'log_level': 'WARN',
        'num_workers': 2,
        'horizon': 5000,
        'env_config': {
            'seed': tune.randint(1000),
            'sumo_scenario': sumo_scenario,
            'headless': args.headless,
            'observation_space': OBSERVATION_SPACE,
            'action_space': ACTION_SPACE,
            'reward_function': tune.function(reward),
            'observation_function': tune.function(observation),
            'action_function': tune.function(action),
        },
        'model': {
            'custom_model': MODEL_NAME,
        },
        "callbacks": {
            "on_episode_start": on_episode_start,
            "on_episode_step": on_episode_step,
            "on_episode_end": on_episode_end
        }
    }

    experiment_name = 'rllib_example'

    log_dir = os.path.expanduser("~/ray_results")
    print(f"Checkpointing at {log_dir}")
    analysis = tune.run(
        'PPO',
        name=experiment_name,
        stop={'time_total_s': 60 * 60},  # 1 hour
        checkpoint_freq=1,
        checkpoint_at_end=True,
        local_dir=log_dir,
        resume=args.resume_training,
        max_failures=10,
        num_samples=args.num_samples,
        export_formats=['model', 'checkpoint'],
        config=tune_config,
    )

    print(analysis.dataframe().head())

    logdir = analysis.get_best_logdir('episode_reward_max')
    model_path = os.path.join(logdir, 'model')
    dest_model_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                   "model")

    if not os.path.exists(dest_model_path):
        shutil.copytree(model_path, dest_model_path)
        print(f"wrote model to: {dest_model_path}")
    else:
        print(f"Model already exists at {dest_model_path} not overwriting")
        print(f"New model is stored at {model_path}")
コード例 #20
0
ファイル: custom_torch_policy.py プロジェクト: tchordia/ray
    log_probs = action_dist.logp(train_batch[SampleBatch.ACTIONS])
    return -train_batch[SampleBatch.REWARDS].dot(log_probs)


# <class 'ray.rllib.policy.torch_policy_template.MyTorchPolicy'>
MyTorchPolicy = build_policy_class(name="MyTorchPolicy",
                                   framework="torch",
                                   loss_fn=policy_gradient_loss)


# Create a new Trainer using the Policy defined above.
class MyTrainer(Trainer):
    def get_default_policy_class(self, config):
        return MyTorchPolicy


if __name__ == "__main__":
    args = parser.parse_args()
    ray.init(num_cpus=args.num_cpus or None)
    tune.run(
        MyTrainer,
        stop={"training_iteration": args.stop_iters},
        config={
            "env": "CartPole-v0",
            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
            "num_workers": 2,
            "framework": "torch",
        },
    )
コード例 #21
0
    }

    current_best_params = [
        {
            "width": 1,
            "height": 2,
            "activation": 0  # Activation will be relu
        },
        {
            "width": 4,
            "height": 2,
            "activation": 1  # Activation will be tanh
        }
    ]

    config = {
        "num_samples": 10 if args.smoke_test else 1000,
        "config": {
            "iterations": 100,
        },
        "stop": {
            "timesteps_total": 100
        },
    }
    algo = HyperOptSearch(space,
                          metric="mean_loss",
                          mode="min",
                          points_to_evaluate=current_best_params)
    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
    run(easy_objective, search_alg=algo, scheduler=scheduler, **config)
コード例 #22
0
from ray import tune
from ray.rllib.agents.ppo import PPOTrainer
tune.run(PPOTrainer, config={"env": "CartPole-v0"})  # "log_level": "INFO" for verbose,
                                                     # "framework": "tfe"/"tf2" for eager,
                                                     # "framework": "torch" for PyTorch
コード例 #23
0
if __name__ == "__main__":
    args = parser.parse_args()
    ModelCatalog.register_custom_model("cc_model", CentralizedCriticModel)
    tune.run(
        "PPO",
        stop={
            "timesteps_total": args.stop,
            "episode_reward_mean": 7.99,
        },
        config={
            "env": GlobalObsTwoStepGame,
            "batch_mode": "complete_episodes",
            "callbacks": {
                "on_postprocess_traj": fill_in_actions,
            },
            "num_workers": 0,
            "multiagent": {
                "policies": {
                    "pol1": (None, GlobalObsTwoStepGame.observation_space,
                             GlobalObsTwoStepGame.action_space, {}),
                    "pol2": (None, GlobalObsTwoStepGame.observation_space,
                             GlobalObsTwoStepGame.action_space, {}),
                },
                "policy_mapping_fn": lambda x: "pol1" if x == 0 else "pol2",
            },
            "model": {
                "custom_model": "cc_model",
            },
        })
コード例 #24
0
def train(config: dict, checkpoint_path: str = None):
    ray.init(local_mode=config["ray"]["local_mode"])

    env_config = {
        "effect": config["env"]["effect"],
        "metric": config["env"]["metric"],
        "feature_extractors": config["env"]["feature_extractors"],
        "source": config["env"]["source"],
        "targets": config["env"]["targets"],
        "eval_interval": config["env"]["eval_interval"],
        "render_to_dac": False,
        "standardize_rewards": False,  # NOTE: experimental feature
        "debug": config["env"]["debug"],
    }

    learning_rate = (config["agent"]["learning_rate"]
                     if "learning_rate" in config["agent"].keys() else 3e-3)
    hidden_layers = config["agent"]["hidden_layers"]
    tanh = "tanh"
    common_config = {
        "env": CrossAdaptiveEnv,
        "env_config": env_config,
        "framework": "torch",
        "num_cpus_per_worker": config["ray"]["num_cpus_per_worker"],
        "log_level": config["ray"]["log_level"],
        "observation_filter": "MeanStdFilter",
        "num_workers": 0,
        "train_batch_size": 256,
    }

    def sac_trainer():
        agent_name = "SAC"
        sac_config = {
            **sac.DEFAULT_CONFIG.copy(),
            **common_config.copy(),
            "learning_starts":
            10000 if not checkpoint_path else 0,
            "target_entropy":
            -24,  # set empirically after trials with dist_lpf
            "optimization": {
                "actor_learning_rate": learning_rate,
                "critic_learning_rate": learning_rate,
                "entropy_learning_rate": learning_rate,
            },
            # Model options for the Q network(s).
            "Q_model": {
                "fcnet_activation": tanh,
                "fcnet_hiddens": hidden_layers,
            },
            # Model options for the policy function.
            "policy_model": {
                "fcnet_activation": tanh,
                "fcnet_hiddens": hidden_layers,
            },
        }
        return sac.SACTrainer, sac_config, agent_name

    def ppo_trainer():
        agent_name = "PPO"
        ppo_config = {
            **ppo.DEFAULT_CONFIG.copy(),
            **common_config.copy(),
            "lr": learning_rate,
            "model": {
                "fcnet_hiddens": hidden_layers,
                "fcnet_activation": tanh,
            },
            "sgd_minibatch_size": 64,
            # Coefficient of the entropy regularizer. Unused if a schedule if set
            "entropy_coeff": 0.0,
            # Decay schedule for the entropy regularizer.
            "entropy_coeff_schedule": None,
        }
        return ppo.PPOTrainer, ppo_config, agent_name

    agent = config["agent"]["agent"]
    available_trainers = ["sac", "ppo"]
    no_agent_error = ValueError(f"{agent} not available")
    if agent not in available_trainers:
        raise no_agent_error
    elif agent == "sac":
        trainer, agent_config, agent_name = sac_trainer()
    elif agent == "ppo":
        trainer, agent_config, agent_name = ppo_trainer()

    # ###############
    # # Hyperparameter search

    # entropy_coeffs = [0.01 * i for i in range(4)]

    # agent_config = tune.grid_search(
    #     [
    #         {
    #             **agent_config.copy(),
    #             "entropy_coeff": entropy_coeff,
    #         }
    #         for entropy_coeff in entropy_coeffs
    #     ]
    # )
    # ###############

    if checkpoint_path:
        # NOTE: hacky way to find the corresponding Tune 'name' of the
        # restored experiment since the checkpoint is always three levels deeper
        path = Path(checkpoint_path)
        name = path.parent.parent.parent.name
    else:
        name = f'{config["label"]}_{agent_name}_{timestamp(millis=False)}'

    progress_reporter = CLIReporter(max_report_frequency=30)

    analysis = tune.run(
        trainer,
        config=agent_config,
        local_dir=RAY_RESULTS_DIR,
        checkpoint_at_end=config["agent"]["checkpoint_at_end"],
        checkpoint_freq=config["agent"]["checkpoint_freq"],
        name=name,
        restore=checkpoint_path,  # None is default
        progress_reporter=progress_reporter,
        stop={"training_iteration": 1000},
    )
    print(analysis)
コード例 #25
0
def main(experiment_name,
         experiment_description,
         train_file_name,
         eval_file_name,
         eval_label_file_name,
         learning_rates,
         weight_decays,
         experiment_number=0,
         device='cuda',
         max_gpus=1,
         process_per_gpu=1):
    file_directory = os.path.dirname(os.path.abspath(__file__))
    parent_folder = os.path.dirname(file_directory)
    base_save_path = os.path.join(file_directory, 'temp/')

    # Create temp folder if not exists
    create_temp_folder(file_directory)

    experiment_description = experiment_description
    train_file_name = train_file_name
    eval_file_name = eval_file_name
    eval_label_file_name = eval_label_file_name

    base_file_path = os.path.join(
        parent_folder,
        'original_data/Datasets/BCICompetitionIV/Data/BCICIV_2a_gdf/')
    base_label_path = os.path.join(parent_folder, 'savecopywithlabels/')

    low_pass = 7
    high_pass = 30
    raw_train_file_name = os.path.join(base_file_path, train_file_name)
    raw_eval_file_name = os.path.join(base_file_path, eval_file_name)
    extract_features = False
    scale_1 = [7, 15, 0.5]
    scale_2 = [16, 30, 0.5]
    split_ratio = 0.7
    splitting_strategy = 'balanced-copy'
    batch_size = 32
    shuffle = True
    workers = 1
    max_epochs = 300
    model_channels = 8
    model_classes = 2

    ## DATA PREP START
    # load raw file into temp folder
    load_eeg_from_gdf(low_pass,
                      high_pass,
                      raw_train_file_name,
                      f'{base_save_path}raw_train',
                      frequency=250,
                      trial_duration=6)
    load_eeg_from_gdf(low_pass,
                      high_pass,
                      raw_eval_file_name,
                      f'{base_save_path}raw_eval',
                      frequency=250,
                      trial_duration=6)

    # move and prepare labels into temp folder
    shutil.copyfile(f'{base_label_path}{eval_label_file_name}',
                    f'{base_save_path}raw_eval_labels.npy')

    # apply CPS in temp folder
    apply_csp(f'{base_save_path}raw_train_{low_pass}_{high_pass}.npy',
              f'{base_save_path}raw_train_labels.npy',
              f'{base_save_path}raw_eval_{low_pass}_{high_pass}.npy',
              f'{base_save_path}raw_eval_labels.npy',
              f'{base_save_path}csp_train', f'{base_save_path}csp_eval',
              low_pass, high_pass)

    # for each of the datasets
    for i in range(1, 5):
        # apply normalize without extraction in temp folder
        apply_normlized_feature_extraction(
            f'{base_save_path}csp_train_class{i}.npy',
            f'{base_save_path}raw_train_labels.npy',
            f'{base_save_path}normalized_train',
            f'{base_save_path}csp_eval_class{i}.npy',
            f'{base_save_path}raw_eval_labels.npy',
            f'{base_save_path}normalized_eval',
            i,
            extract=extract_features)
        # Prepare Train and Val sets
        data_splitter = DataSplitter(
            f'{base_save_path}normalized_train_class{i}.npy',
            f'{base_save_path}normalized_train_class{i}_labels.npy',
            f'{base_save_path}_class{i}', split_ratio)
        data_splitter.split(splitting_strategy)
    ## DATA PREP END

    ## Threaded Model Start

    max_gpus = int(max_gpus)
    process_per_gpu = int(process_per_gpu)

    device = 'cuda'
    ray_config = {
        'base_save_path': base_save_path,
        'batch_size': batch_size,
        'device': device,
        'eval_file_name': eval_file_name,
        'experiment_description': experiment_description,
        'experiment_name': experiment_name,
        'extract_features': extract_features,
        'file_directory': file_directory,
        'high_pass': high_pass,
        'learning_rate': tune.grid_search(learning_rates),
        'low_pass': low_pass,
        'max_epochs': max_epochs,
        'model_channels': model_channels,
        'model_classes': model_classes,
        'scale_1': scale_1,
        'scale_2': scale_2,
        'shuffle': shuffle,
        'split_ratio': split_ratio,
        'splitting_strategy': splitting_strategy,
        'train_file_name': train_file_name,
        'weight_decay': tune.grid_search(weight_decays),
        'workers': workers
    }

    analysis = tune.run(run_threaded_model,
                        config=ray_config,
                        resources_per_trial={"gpu": 1 / process_per_gpu},
                        num_samples=1,
                        mode='max',
                        max_concurrent_trials=max_gpus * process_per_gpu)

    best_conf = analysis.get_best_config(metric="mean_accuracy", mode="max")

    print("Best config: ", best_conf)
    # Delete the temp Folder
    delete_temp_folder(file_directory)
    return best_conf
コード例 #26
0
ファイル: run_test.py プロジェクト: stjordanis/nupic.research
    debug_sparse=True,
    dataset_name="CIFAR10",
    # input_size=(3,32,32), # 784,
    stats_mean=(0.4914, 0.4822, 0.4465),
    stats_std=(0.2023, 0.1994, 0.2010),
    data_dir="~/nta/datasets",
    device="cpu",
    optim_alg="SGD",
)

# run
ray.init()
tune.run(
    Trainable,
    name="SET_local_test",
    num_samples=1,
    local_dir=os.path.expanduser("~/nta/results"),
    config=config,
    checkpoint_freq=0,
    checkpoint_at_end=False,
    stop={"training_iteration": 10},
    resources_per_trial={
        "cpu": 1,
        "gpu": 0
    },
)
""""
ongoing notes

"""
コード例 #27
0
        if iteration == 0 or 5 <= iteration <= 19:
            # Either the optimizer doesn't exist yet, or we're changing the loss
            # function and the adaptive state is invalidated. I don't think the
            # original paper contained this logic, never resetting the Adam
            # optimizer while performing "warmup" on the regularization, but it
            # seems like the right thing to do.
            self.optimizer = torch.optim.Adam(self._get_parameters(), lr=lr)
        else:
            for param_group in self.optimizer.param_groups:
                param_group["lr"] = lr

        return super().run_epoch(iteration)


if __name__ == "__main__":
    ray.init()

    tune.run(
        experiments.as_ray_trainable(VDropExperiment),
        name=os.path.basename(__file__).replace(".py", ""),
        num_samples=1,
        checkpoint_freq=0,
        checkpoint_at_end=False,
        resources_per_trial={
            "cpu": 1,
            "gpu": (1 if torch.cuda.is_available() else 0)
        },
        loggers=DEFAULT_LOGGERS,
        verbose=1,
    )
                (cmt_anomaly - tmp_anomaly), axis=0)
            precision_anomaly[np.isnan(precision_anomaly)] = 0
            recall_anomaly[np.isnan(recall_anomaly)] = 0
            epoch_acc_anomaly = 2 * np.mean(recall_anomaly) * np.mean(
                precision_anomaly) / (np.mean(precision_anomaly) +
                                      np.mean(recall_anomaly))
            epoch_acc_anatomy = 2 * np.mean(recall_anatomy) * np.mean(
                precision_anatomy
            ) / (
                np.mean(precision_anatomy) + np.mean(recall_anatomy)
            )  #running_corrects_anomaly*batch_size / len(dataloaders[phase].dataset)

            # We save the latent variable and reconstruction for later use
            # we will need them on the CPU to plot
            #x = x.to("cpu")
            #x_hat = x_hat.to("cpu")
            #z = z.detach().to("cpu").numpy()
            val_acc_anatomy_history.append(epoch_acc_anatomy)
            val_acc_anomaly_history.append(epoch_acc_anomaly)
            valid_loss.append(np.mean(batch_loss_val))
            valid_kl.append(np.mean(batch_kl_val))

            tune.track.log(mean_acc=(epoch_acc_anatomy + epoch_acc_anomaly) /
                           2)
    #--------------------------------------------------------------------------------------------------


analysis = tune.run(
    train_model,
    config={"alpha": tune.grid_search([1, 5, 10, 20, 50, 100])},
    resources_per_trial={'gpu': 2})
コード例 #29
0
def main(num_samples: int, num_workers: int):
    pbt = PopulationBasedTraining(time_attr="time_total_s",
                                  perturbation_interval=50,
                                  resample_probability=0.25,
                                  hyperparam_mutations={
                                      "lambda":
                                      tune.uniform(0.9, 1.0),
                                      "clip_param":
                                      tune.uniform(0.01, 0.5),
                                      "lr":
                                      [1e-2, 1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
                                      "num_sgd_iter":
                                      tune.randint(1, 30),
                                      "sgd_minibatch_size":
                                      tune.randint(128, 16384),
                                      "train_batch_size":
                                      tune.randint(2000, 160000),
                                  })

    analysis = tune.run("PPO",
                        name="pbt_portfolio_reallocation",
                        scheduler=pbt,
                        num_samples=num_samples,
                        metric="episode_reward_min",
                        mode="max",
                        config={
                            "env":
                            "TradingEnv",
                            "env_config": {
                                "total_steps": 1000,
                                "num_assets": 4,
                                "commission": 1e-3,
                                "time_cost": 0,
                                "window_size": tune.randint(5, 50),
                                "min_periods": 150
                            },
                            "kl_coeff":
                            1.0,
                            "num_workers":
                            num_workers,
                            "num_gpus":
                            0,
                            "observation_filter":
                            tune.choice(["NoFilter", "MeanStdFilter"]),
                            "framework":
                            "torch",
                            "model": {
                                "custom_model": "reallocate",
                                "custom_model_config": {
                                    "num_assets": 4
                                },
                                "custom_action_dist": "dirichlet",
                            },
                            "num_sgd_iter":
                            10,
                            "sgd_minibatch_size":
                            128,
                            "lambda":
                            tune.uniform(0.9, 1.0),
                            "clip_param":
                            tune.uniform(0.1, 0.5),
                            "lr":
                            tune.loguniform(1e-2, 1e-5),
                            "train_batch_size":
                            tune.randint(1000, 20000)
                        },
                        stop={
                            "episode_reward_min": 20,
                            "training_iteration": 100
                        },
                        checkpoint_at_end=True,
                        local_dir="./results")

    checkpoints = analysis.get_trial_checkpoints_paths(
        trial=analysis.get_best_trial(metric="episode_reward_min", mode="max"),
        metric="episode_reward_mean")

    params = {"config": analysis.best_config, "checkpoints": checkpoints}

    json.dump(params, open("data/tuned_params.json", "w"), indent=4)
コード例 #30
0
ファイル: bohb_example.py プロジェクト: hngenc/ray
    # Optional: Pass the parameter space yourself
    # config_space = CS.ConfigurationSpace()
    # config_space.add_hyperparameter(
    #     CS.UniformFloatHyperparameter("width", lower=0, upper=20))
    # config_space.add_hyperparameter(
    #     CS.UniformFloatHyperparameter("height", lower=-100, upper=100))
    # config_space.add_hyperparameter(
    #     CS.CategoricalHyperparameter(
    #         "activation", choices=["relu", "tanh"]))

    bohb_hyperband = HyperBandForBOHB(time_attr="training_iteration",
                                      max_t=100,
                                      reduction_factor=4,
                                      stop_last_trials=False)

    bohb_search = TuneBOHB(
        # space=config_space,  # If you want to set the space manually
        max_concurrent=4)

    analysis = tune.run(MyTrainableClass,
                        name="bohb_test",
                        config=config,
                        scheduler=bohb_hyperband,
                        search_alg=bohb_search,
                        num_samples=10,
                        stop={"training_iteration": 100},
                        metric="episode_reward_mean",
                        mode="max")

    print("Best hyperparameters found were: ", analysis.best_config)
コード例 #31
0
import argparse
import pprint
from ray import tune
import ray
from ray.rllib.agents.a3c.a3c import (DEFAULT_CONFIG, A3CTrainer as trainer)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--env', help='Gym env name.')
    args = parser.parse_args()
    config = DEFAULT_CONFIG.copy()
    config_update = {
        "env": args.env,
        "num_gpus": 1,
        "num_workers": 50,
        "evaluation_num_workers": 10,
        "evaluation_interval": 1,
        "use_gae": False
    }
    config.update(config_update)
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(config)
    ray.init()
    tune.run(trainer, stop={"timesteps_total": 2000000}, config=config)
コード例 #32
0
ファイル: run_gsc.py プロジェクト: renderist/nupic.research
    # ----- Additional Validation -----
    test_noise=False,
    noise_level=0.1,
    # ----- Debugging -----
    debug_weights=True,
    debug_sparse=True,
)

# ray configurations
experiment_name = "gsc-test"
tune_config = dict(
    name=experiment_name,
    num_samples=1,
    local_dir=os.path.expanduser(os.path.join("~/nta/results", experiment_name)),
    checkpoint_freq=0,
    checkpoint_at_end=False,
    stop={"training_iteration": 100},
    resources_per_trial={"cpu": 1, "gpu": 1},
    loggers=DEFAULT_LOGGERS,
    verbose=1,
    config=base_exp_config,
)

# override when running local for test
if not torch.cuda.is_available():
    base_exp_config["device"] = "cpu"
    tune_config["resources_per_trial"] = {"cpu": 1}

init_ray()
tune.run(Trainable, **tune_config)
コード例 #33
0
                self.should_stop = True
            return self.should_stop or result["training_iteration"] >= max_iter

        def stop_all(self):
            return self.should_stop

    stopper = CustomStopper()

    analysis = tune.run(
        train_convnet,
        name="pbt_test",
        scheduler=scheduler,
        metric="mean_accuracy",
        mode="max",
        verbose=1,
        stop=stopper,
        export_formats=[ExportFormat.MODEL],
        checkpoint_score_attr="mean_accuracy",
        keep_checkpoints_num=4,
        num_samples=4,
        config={
            "lr": tune.uniform(0.001, 1),
            "momentum": tune.uniform(0.001, 1),
        })
    # __tune_end__

    if args.server_address:
        # If using Ray Client, we want to make sure checkpoint access
        # happens on the server. So we wrap `test_best_model` in a Ray task.
        # We have to make sure it gets executed on the same node that
        # ``tune.run`` is called on.
        from ray.tune.utils.util import force_on_current_node
コード例 #34
0
            local_dir=os.getenv("HOME") + "/dcase/result/ray_results",
            stop=TrainStopper(max_ep=200, stop_thres=200),
            checkpoint_freq=1,
            keep_checkpoints_num=1,
            checkpoint_at_end=True,
            checkpoint_score_attr="acc",
            resources_per_trial={"gpu": 0, "cpu": 64},
        )

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('-test', action='store_true')  # default = false
    args = parser.parse_args()

    if args.test:
        print("====== Test Run =======")
        from asc import exp_utils
        c = exp_utils.exp_to_config(exp)
        t = Trainable(c)
        t._train()
        exit()

    ray.shutdown()
    ray.init(local_mode=True, webui_host="0.0.0.0")

    analysis = tune.run(
        exp,
        verbose=2,
    )
コード例 #35
0
# <class 'ray.rllib.agents.trainer_template.MyCustomTrainer'>
MyTrainer = build_trainer(
    name="MyCustomTrainer",
    default_policy=MyTFPolicy,
)

if __name__ == "__main__":
    ray.init()
    args = parser.parse_args()
    ModelCatalog.register_custom_model("eager_model", EagerModel)

    config = {
        "env": "CartPole-v0",
        "num_workers": 0,
        "model": {
            "custom_model": "eager_model"
        },
        "framework": "tfe",
    }
    stop = {
        "timesteps_total": args.stop_timesteps,
        "training_iteration": args.stop_iters,
        "episode_reward_mean": args.stop_reward,
    }

    results = tune.run(MyTrainer, stop=stop, config=config)

    if args.as_test:
        check_learning_achieved(results, args.stop_reward)
    ray.shutdown()