コード例 #1
0
ファイル: train.py プロジェクト: shahbuland/Learning-RL
def train_on_env(model, env_name, episodes):
	# Initialize an optimizer for model and a summary writer
	opt = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE, betas = BETAS)
	model.train()
	writer = SummaryWriter()

	env = gym.make(env_name)
	next_s = ops.prep_state(env.reset()) # initial state to send to sample

	for update in range(UPDATES):
		samples, total_reward, next_s = sample(model, env, next_s)
		total_loss = PPOtrain(model, opt, samples)
		writer.add_scalar("Loss", total_loss, update)
		writer.add_scalar("Total Reward", total_reward, update)
コード例 #2
0
ファイル: train.py プロジェクト: shahbuland/Learning-RL
def sample(model, env, steps, start_s):
	store = Rollout_Storage()
	total_r = 0

	s = start_s
	if s is None:
		s = ops.prep_state(env.reset())

	for step in range(steps):
		pi, logits, v = model(s)
		a = pi.sample() # Sample action
		log_p = pi.log_prob(a) # Log prob of "a" being selected
		
		next_s, r, d = ops.mod_step(env, a)
		total_r += r.item()
		store.add([s, next_s, a, log_p, r, d])
		s = next_s

	return store, total_r, s
コード例 #3
0
SPF = 1 / FPS

env = gym.make(ENV_NAME)
agent = Agent()

try:
    agent.model.load_state_dict(torch.load("params.pt"))
    print("Loaded checkpoint")
except:
    print("Could not load checkpoint")

while True:
    total_r = 0
    s = env.reset()
    s = prep_state(s)

    for i in range(TIME_LIMIT):

        time.sleep(SPF)
        env.render()

        a = agent.act(s, explore=False)

        s, r, done, _ = env.step(a)
        s = prep_state(s)
        total_r += r

        if done: break

    print(total_r)
コード例 #4
0
ファイル: train.py プロジェクト: shahbuland/Learning-RL
# Try loading previous agent
if LOAD_CHECKPOINTS:
    try:
        agent.model.load_state_dict(torch.load("params.pt"))
        print("Loaded checkpoint")
    except:
        print("Could not load checkpoint")

scores = []

step = 0

for e in range(EPISODES):
    total_r = 0
    s = env.reset()
    s = prep_state(s)
    q_loss = 0
    for t in range(TIME_LIMIT):

        env.render()

        a = agent.act(s)

        s_new, r, done, _ = env.step(a)
        r = s_new[1]
        total_r += r

        s_new = prep_state(s_new)
        agent.add_exp([s, a, r, s_new, done])

        s = s_new
コード例 #5
0
ファイル: testops.py プロジェクト: shahbuland/Learning-RL
from ops import prep_state
import numpy as np
import torch
import gym

env = gym.make('Breakout-v0')
s = env.reset()
s_prep = prep_state(s)
print(s_prep.shape)