def breakout_a2c_evaluate(checkpoint_file_path, takes=10):
    model_checkpoint = torch.load(checkpoint_file_path)
    device = torch.device('cuda:0')

    env = FrameStack(
        ClassicAtariEnv('BreakoutNoFrameskip-v4').instantiate(preset='raw'), k=4
    )

    model = PolicyGradientModelFactory(
        backbone=NatureCnnFactory(input_width=84, input_height=84, input_channels=4)
    ).instantiate(action_space=env.action_space)

    model.load_state_dict(model_checkpoint)
    model = model.to(device)

    model.eval()

    rewards = []
    lengths = []

    for i in range(takes):
        result = record_take(model, env, device)
        rewards.append(result['r'])
        lengths.append(result['l'])

    print(pd.DataFrame({'lengths': lengths, 'rewards': rewards}).describe())
Exemple #2
0
def eval_model():
    """load a checkpoint data and evaluate its performance
    :return: None
    """
    device = torch.device('cpu')
    seed = 1001

    # Set random seed in python std lib, numpy and pytorch
    set_seed(seed)

    env_function = lambda: ColoredEgoCostmapRandomAisleTurnEnv()
    vec_env = DummyVecEnv([env_function])
    vec_env.reset()

    model = PolicyGradientModelFactory(backbone=NatureCnnTwoTowerFactory(
        input_width=133, input_height=133, input_channels=1)).instantiate(
            action_space=vec_env.action_space)
    model_checkpoint = torch.load('tmp_checkout.data', map_location='cpu')
    model.load_state_dict(model_checkpoint)

    evaluate_model(model, vec_env, device, takes=10)