Esempio n. 1
0
    }
    assert os.path.isfile(os.path.join(
        PATHS['model'],
        'best_model.zip')), "No model file found in %s" % PATHS['model']
    assert os.path.isfile(
        PATHS['scenerios_json_path']
    ), "No scenario file named %s" % PATHS['scenerios_json_path']

    # initialize hyperparams
    params = load_hyperparameters_json(agent_hyperparams, PATHS)

    print("START RUNNING AGENT:    %s" % params['agent_name'])
    print_hyperparameters(params)

    # initialize task manager
    task_manager = get_predefined_task(mode='ScenerioTask', PATHS=PATHS)
    # initialize gym env
    env = DummyVecEnv([
        lambda: FlatlandEnv(task_manager,
                            PATHS.get('robot_setting'),
                            PATHS.get('robot_as'),
                            params['reward_fnc'],
                            params['discrete_action_space'],
                            goal_radius=0.50,
                            max_steps_per_episode=350)
    ])
    if params['normalize']:
        env = VecNormalize(env,
                           training=False,
                           norm_obs=True,
                           norm_reward=False,
Esempio n. 2
0
    # initialize hyperparameters (save to/ load from json)
    hyperparams_obj = agent_hyperparams(AGENT_NAME, robot, gamma, n_steps,
                                        ent_coef, learning_rate, vf_coef,
                                        max_grad_norm, gae_lambda, batch_size,
                                        n_epochs, clip_range, reward_fnc,
                                        discrete_action_space, normalize,
                                        task_mode, start_stage)
    params = initialize_hyperparameters(agent_name=AGENT_NAME,
                                        PATHS=PATHS,
                                        hyperparams_obj=hyperparams_obj,
                                        load_target=args.load)

    # instantiate gym environment
    n_envs = 1
    task_manager = get_predefined_task(params['task_mode'],
                                       params['curr_stage'], PATHS)
    env = DummyVecEnv([
        lambda: FlatlandEnv(task_manager,
                            PATHS.get('robot_setting'),
                            PATHS.get('robot_as'),
                            params['reward_fnc'],
                            params['discrete_action_space'],
                            goal_radius=1.00,
                            max_steps_per_episode=200)
    ] * n_envs)
    if params['normalize']:
        env = VecNormalize(env,
                           training=True,
                           norm_obs=True,
                           norm_reward=False,
                           clip_reward=15)
Esempio n. 3
0
    # generate agent name and model specific paths
    AGENT_NAME = get_agent_name(args)

    print("________ STARTING TRAINING WITH:  %s ________\n" % AGENT_NAME)
    PATHS = get_paths(AGENT_NAME, args)

    if args.n is None:
        n_timesteps = 6000
    else:
        n_timesteps = args.n

    # instantiate gym environment
    n_envs = 1

    task = get_predefined_task("random")
    env = DummyVecEnv([lambda: FlatlandEnv(task, PATHS.get('robot_setting'), PATHS.get('robot_as'), discrete_action_space)] * n_envs)
   
    # instantiate eval environment
    eval_env = Monitor(FlatlandEnv(task, PATHS.get('robot_setting'), PATHS.get('robot_as'), discrete_action_space), PATHS.get('eval'))
    eval_env = EvalCallback(eval_env, n_eval_episodes=10, eval_freq=250, log_path=PATHS.get('eval'), best_model_save_path=PATHS.get('model'), deterministic=True)


    # determine mode
    if args.custom_mlp:
        # custom mlp flag
        model = PPO("MlpPolicy", env, policy_kwargs = dict(net_arch = args.net_arch, activation_fn = get_act_fn(args.act_fn)), 
                    gamma = gamma, n_steps = n_steps, ent_coef = ent_coef, learning_rate = learning_rate, vf_coef = vf_coef, 
                    max_grad_norm = max_grad_norm, gae_lambda = gae_lambda, batch_size = batch_size, n_epochs = n_epochs, clip_range = clip_range, 
                    tensorboard_log = PATHS.get('tb'), verbose = 1)