예제 #1
0
def policyCrossEval(log_dir,
                    task,
                    episode,
                    model_path,
                    num_timesteps=2000,
                    num_cpu=1,
                    seed=0):
    train_args, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup(
        log_dir)
    env_kwargs = EnvsKwargs(task, env_kwargs)

    OK = True
    if (not OK):
        # no latest model saved yet
        return None, False
    else:
        pass
    printGreen(
        "Evaluation from the model saved at: {}, with evaluation time steps: {}"
        .format(model_path, num_timesteps))

    log_dir, environment, algo_args = createEnv(log_dir,
                                                train_args,
                                                algo_name,
                                                algo_class,
                                                env_kwargs,
                                                num_cpu=num_cpu,
                                                seed=seed)

    reward = policyEval(environment, model_path, log_dir, algo_class,
                        algo_args, num_timesteps, num_cpu)

    # Just a trick to save the episode number of the reward,but need a little bit more space to store
    reward = np.append(episode, reward)
    return reward, True
예제 #2
0
def policyCrossEval(log_dir,
                    task,
                    episode,
                    model_path,
                    num_timesteps=2000,
                    num_cpu=1):
    """
    To do a cross evaluation for a certain policy for different tasks
    A version of real time evaluation but with some bugs to fix
    :param log_dir:
    :param task:
    :param episode:
    :param model_path:
    :param num_timesteps: How many timesteps to evaluate the policy
    :param num_cpu:
    :return:
    """
    train_args, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup(
        log_dir)
    env_kwargs = EnvsKwargs(task, env_kwargs)

    OK = True
    if (not OK):
        # no latest model saved yet
        return None, False
    else:
        pass
    printGreen(
        "Evaluation from the model saved at: {}, with evaluation time steps: {}"
        .format(model_path, num_timesteps))

    log_dir, environment, algo_args = createEnv(log_dir,
                                                train_args,
                                                algo_name,
                                                algo_class,
                                                env_kwargs,
                                                num_cpu=num_cpu)

    reward = policyEval(environment, model_path, log_dir, algo_class,
                        algo_args, num_timesteps, num_cpu)

    # Just a trick to save the episode number of the reward,but need a little bit more space to store
    reward = np.append(episode, reward)
    return reward, True