Beispiel #1
0
def generate_exp1_n_2():
    # execute training
    from datetime import datetime
    print('===== Start =====')
    print('start time: ', str(datetime.now()))
    lander = LunarLander(annealing_size=150,
                         alpha=0.0015,
                         batch_size=25,
                         update_step=2,
                         load_weights=False)
    lander.start_record(render=False)
    lander.train()
    lander.test()
    lander.end_record(upload_key='sk_XwbuJNCrQnqa1MJDOk3dyQ')
    print('completed time: ', str(datetime.now()))
def eval(model_type=model_type, model_path=model_path):
    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'

    env = LunarLander()

    if model_type == 'policy':
        model = Policy(env.observation_dim, env.action_dim)
    elif model_type == 'dqn':
        model = Network(env.observation_dim, env.action_dim)
    model.to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()

    episodes = 50
    wins = 0
    frames = []
    fuel_left = []
    for i in range(episodes):
        if i % 10 == 0:
            print(f"On episode {i}")
        frame_count = 0

        env.reset()
        state = env.get_state()
        while True:
            frame_count += 1

            action = model(
                torch.tensor(state, dtype=torch.float32,
                             device=device).unsqueeze(0)).argmax()

            state, reward, done = env.step(action)

            if done:
                if env.won:
                    wins += 1
                    frames.append(frame_count)
                    fuel_left.append(env.rocket.fuel)
                break
        env.close()

    if wins > 0:
        print(f"wins: {wins}")
        print(f"mean frames on wins {np.mean(frames)}")
        print(f"std frames on wins {np.std(frames, ddof=1)}")
        print(f"min frames on wins {np.min(frames)}")
        print(f"max frames on wins {np.max(frames)}")

        print(f"mean fuel on wins {np.mean(fuel_left)}")
        print(f"std fuel on wins {np.std(fuel_left, ddof=1)}")
        print(f"min fuel on wins {np.min(fuel_left)}")
        print(f"max fuel on wins {np.max(fuel_left)}")
    else:
        print("The model had 0 wins. Statistics can't be calculated")
Beispiel #3
0
def test_model(episodes):
    wins = 0
    frames = []
    fuel_left = []

    env = LunarLander()
    for i in range(episodes):
        frame_count = 0
        env.reset()
        state = env.get_state()
        while True:
            frame_count += 1
            action = model(
                torch.tensor(state, dtype=torch.float32,
                             device=device).unsqueeze(0)).argmax()
            state, reward, done = env.step(action)

            if done:
                if env.won:
                    wins += 1
                    frames.append(frame_count)
                    fuel_left.append(env.rocket.fuel)
                break

    if len(fuel_left) > 0:
        return np.mean(fuel_left), wins
    else:
        return 0, 0
Beispiel #4
0
import torch
from Policy import Policy
from DQN import Network
import pygame

# import the environment you want to use
# from SimplifiedLunarLander import LunarLander
from LunarLander import LunarLander

# 'policy' or 'dqn' to choose which type of model to evaluate
model_type = 'policy'
# model_type = 'dqn'
model_path = "policies/22-1-2021_13-44/policy0.tar"

env = LunarLander()
env.reset()
exit_program = False

if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

if model_type == 'policy':
    model = Policy(env.observation_dim, env.action_dim)
elif model_type == 'dqn':
    model = Network(env.observation_dim, env.action_dim)
model.to(device)
model.load_state_dict(torch.load(model_path))
model.eval()
state = env.get_state()
Beispiel #5
0
import random
import numpy as np
import os
from DQN_agent import DQNAgent

# import the environment you want to use
# from SimplifiedLunarLander import LunarLander
from LunarLander import LunarLander

# number of frames to train on
num_frames = 20_000
memory_size = 1000
batch_size = 32
target_update = 100
seed = 0
env = LunarLander()

# whether or not to use wandb. All wandb code is commented out so that code can be run without it
log = False


def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
Beispiel #6
0
                    fuel_left.append(env.rocket.fuel)
                break

    if len(fuel_left) > 0:
        return np.mean(fuel_left), wins
    else:
        return 0, 0


if __name__ == '__main__':
    files = os.listdir(model_directory)

    best_mean_fuel = 0
    best_file = ""
    best_wins = 0
    env = LunarLander()
    for file in files:
        model_path = f"{model_directory}/{file}"

        if model_type == 'policy':
            model = Policy(env.observation_dim, env.action_dim)
        elif model_type == 'dqn':
            model = Network(env.observation_dim, env.action_dim)
        model.to(device)
        model.load_state_dict(torch.load(model_path))
        model.eval()

        print(f"Testing model {file}")
        mean_fuel, wins = test_model(episodes)

        if wins == best_wins: