def rollout(env, savedir, policy): images = Path('images') max_frames = 1000 num_seen_frames = 0 done = False metadata = [] if policy == 'random': actions = [env.action_space.sample() for _ in range(max_frames + 1)] elif policy == 'brown': actions = brownian_sample(env.action_space, max_frames + 1, dt=1 / 50) hidden = [ torch.zeros(1, settings.mdrnn_hidden_dim).to(settings.device) for _ in range(2) ] rg = RolloutGenerator(mdir=Path(os.environ['top']), device=settings.device, time_limit=1000) obs = env.reset() while not done: if policy in ['random', 'brown']: action = actions[num_seen_frames] elif policy == 'controller': transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((64, 64)), transforms.ToTensor() ]) obs = transform(obs).unsqueeze(0).to(settings.device) action, hidden = rg.get_action_and_transition(obs, hidden) env.render('rgb_array') # Look into why this call is necessary. obs, reward, done, _ = env.step(action) if num_seen_frames == max_frames: done = True reward = -100.0 if num_seen_frames > 0: cv2.imwrite(f'{savedir}/frame_{num_seen_frames:04}.png', obs) metadata.append( dict(idx=num_seen_frames, action=action.tolist(), reward=reward, done=done)) num_seen_frames += 1 with open(f'{savedir}/metadata.json', 'w') as f: content = json.dumps(metadata, indent=4) f.write(content)
def slave_routine(p_queue, r_queue, e_queue, p_index): """ Thread routine. Threads interact with p_queue, the parameters queue, r_queue, the result queue and e_queue the end queue. They pull parameters from p_queue, execute the corresponding rollout, then place the result in r_queue. Each parameter has its own unique id. Parameters are pulled as tuples (s_id, params) and results are pushed as (s_id, result). The same parameter can appear multiple times in p_queue, displaying the same id each time. As soon as e_queue is non empty, the thread terminate. When multiple gpus are involved, the assigned gpu is determined by the process index p_index (gpu = p_index % n_gpus). :args p_queue: queue containing couples (s_id, parameters) to evaluate :args r_queue: where to place results (s_id, results) :args e_queue: as soon as not empty, terminate :args p_index: the process index """ # init routine gpu = p_index % torch.cuda.device_count() device = torch.device( 'cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu') # redirect streams sys.stdout = open(join(tmp_dir, str(getpid()) + '.out'), 'a') sys.stderr = open(join(tmp_dir, str(getpid()) + '.err'), 'a') # with torch.no_grad(): # r_gen = RolloutGenerator(logdir, device, time_limit) # while e_queue.empty(): # if p_queue.empty(): # sleep(.1) # else: # s_id, params = p_queue.get() # r_queue.put((s_id, r_gen.rollout(params))) with torch.no_grad(): r_gen = RolloutGenerator(logdir, device, time_limit) while e_queue.empty(): if p_queue.empty(): sleep(.1) else: s_id, params = p_queue.get() r_queue.put((s_id, r_gen.rollout(params)))
def controller_test_proc(controller, vae, mdrnn): step_log('4-3. controller_test_proc START!!') # define current best and load parameters if not os.path.exists(ctrl_dir): os.mkdir(ctrl_dir) ctrl_file = os.path.join(ctrl_dir, 'best.tar') print("Attempting to load previous best...") if os.path.exists(ctrl_file): # state = torch.load(ctrl_file, map_location={'cuda:0': 'cpu'}) state = torch.load(ctrl_file) controller.load_state_dict(state['state_dict']) print("Controller Test Rollout START!!") with torch.no_grad(): r_gen = RolloutGenerator(vae, mdrnn, controller, device, rollout_time_limit) r_gen.rollout(flatten_parameters(controller.parameters()), render=True)
def rollout_routine(): """ Thread routine. Threads interact with p_queue, the parameters queue, r_queue, the result queue and e_queue the end queue. They pull parameters from p_queue, execute the corresponding rollout, then place the result in r_queue. Each parameter has its own unique id. Parameters are pulled as tuples (s_id, params) and results are pushed as (s_id, result). The same parameter can appear multiple times in p_queue, displaying the same id each time. As soon as e_queue is non empty, the thread terminate. When multiple gpus are involved, the assigned gpu is determined by the process index p_index (gpu = p_index % n_gpus). :args p_queue: queue containing couples (s_id, parameters) to evaluate :args r_queue: where to place results (s_id, results) :args e_queue: as soon as not empty, terminate :args p_index: the process index """ # init routine #gpu = p_index % torch.cuda.device_count() #device = torch.device('cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu') # redirect streams #if not os.path.exists(tmp_dir): # os.mkdir(tmp_dir) #sys.stdout = open(os.path.join(tmp_dir, 'rollout.out'), 'a') #sys.stderr = open(os.path.join(tmp_dir, 'rollout.err'), 'a') with torch.no_grad(): r_gen = RolloutGenerator(vae, mdrnn, controller, device, rollout_time_limit) while not p_queue.empty(): print('in rollout_routine, p_queue.get()') s_id, params = p_queue.get() print('r_queue.put() sid=%d' % s_id) r_queue.put((s_id, r_gen.rollout(params))) print('r_gen.rollout OK, r_queue.put()')
def slave_routine(p_queue, r_queue, e_queue, p_index): """ Thread routine. Threads interact with p_queue, the parameters queue, r_queue, the result queue and e_queue the end queue. They pull parameters from p_queue, execute the corresponding rollout, then place the result in r_queue. Each parameter has its own unique id. Parameters are pulled as tuples (s_id, params) and results are pushed as (s_id, result). The same parameter can appear multiple times in p_queue, displaying the same id each time. As soon as e_queue is non empty, the thread terminate. When multiple gpus are involved, the assigned gpu is determined by the process index p_index (gpu = p_index % n_gpus). :args p_queue: queue containing couples (s_id, parameters) to evaluate :args r_queue: where to place results (s_id, results) :args e_queue: as soon as not empty, terminate :args p_index: the process index """ # init routine gpu = p_index % torch.cuda.device_count() device = torch.device('cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu') # redirect streams sys.stdout = open(join(tmp_dir, str(getpid()) + '.out'), 'a') sys.stderr = open(join(tmp_dir, str(getpid()) + '.err'), 'a') with torch.no_grad(): r_gen = RolloutGenerator(args.logdir, device, time_limit) while e_queue.empty(): if p_queue.empty(): sleep(.1) else: s_id, params = p_queue.get() r_queue.put((s_id, r_gen.rollout(params)))
""" Test controller """ import argparse from os.path import join, exists from utils.misc import RolloutGenerator import torch parser = argparse.ArgumentParser() parser.add_argument('--logdir', type=str, help='Where models are stored.') args = parser.parse_args() ctrl_file = join(args.logdir, 'ctrl', 'best.tar') assert exists(ctrl_file),\ "Controller was not trained..." device = torch.device('cpu') generator = RolloutGenerator(args.logdir, device, 1000) with torch.no_grad(): r = generator.rollout(None) print(r)
parser = argparse.ArgumentParser() parser.add_argument('--logdir', type=str, help='Where models are stored.') parser.add_argument('--iteration_num', type=int, help="Iteration number of which controller to use") parser.add_argument('--rollouts', type=int, help='Number of rollouts to generate', default=1) parser.add_argument('--rollouts_dir', type=str, help='Directory to store the rollouts') args = parser.parse_args() ctrl_file = join(args.logdir, 'ctrl', 'best.tar') if args.iteration_num is not None: ctrl_file = join(args.logdir, 'ctrl', 'iter_{}'.format(args.iteration_num), 'best.tar') assert exists(ctrl_file),\ "Controller was not trained..." device = torch.device('cpu') generator = RolloutGenerator(args.logdir, device, 1000, args.iteration_num) for i in range(0, args.rollouts): with torch.no_grad(): generator.rollout(None, rollout_dir=args.rollouts_dir, rollout_num=i)
import scipy.stats import gym import gym_minigrid from gym_minigrid.wrappers import * from gym_minigrid.window import Window from utils.misc import RolloutGenerator #import VAEtorch #import trainmdrnn Device_Used = "cuda" device = torch.device("cuda" if Device_Used == "cuda" else "cpu") #device = torch.device('cpu') parser = argparse.ArgumentParser() parser.add_argument('--logdir', type=str, help='Where models are stored.') args = parser.parse_args() args.logdir = 'D:\steps1000' #RolloutGenerator Params: # (mdir: model directory, device, time_limit: number of samples in goal space before exploration, #number_goals: number of goals to set over lifetime of agent, #Forward_model: 'M' = World Model, 'D' = Linear layers(do not use), #hiddengoals: True = Goals set in World Model, False = goals as observations(basically IMGEPs), #curiosityreward = True/False - not relevant in this implementation, #static: True = static VAE and HiddenVAE, False = constantly evolving VAE and HiddenVAE) generator = RolloutGenerator(args.logdir, device, 100, 200, True, False, False) generator.rollout(None, render=False) #run program
type=bool, help='Boolean to not store videos or store them', default=False) args = parser.parse_args() ctrl_file = join(args.logdir, 'ctrl', 'best.tar') if args.iteration_num is not None: ctrl_file = join(args.logdir, 'ctrl', 'iter_{}'.format(args.iteration_num), 'best.tar') assert exists(ctrl_file),\ "Controller was not trained..." device = torch.device('cpu') generator = RolloutGenerator(args.logdir, device, 2000, args.iteration_num) total_reward = 0 top5rollouts = [] video_dir = args.video_dir if args.iteration_num is not None: video_dir = join(args.video_dir, "iter_{}".format(args.iteration_num)) makedirs(video_dir, exist_ok=True) f = open(join(video_dir, "rewards"), "w+") f.write("Reward Rollout#\n") for i in range(0, args.rollouts): with torch.no_grad(): store_video_dir = None if args.do_not_store_videos else video_dir reward = -generator.rollout(None,
""" Test controller """ import argparse from os.path import join, exists from utils.misc import RolloutGenerator import torch parser = argparse.ArgumentParser() parser.add_argument('--logdir', type=str, help='Where models are stored.') args = parser.parse_args() ctrl_file = join(args.logdir, 'ctrl', 'best.tar') assert exists(ctrl_file),\ "Controller was not trained..." device = torch.device('cpu') generator = RolloutGenerator(args.logdir, device, 1000) with torch.no_grad(): generator.rollout(None)
from os.path import join, exists from utils.misc import RolloutGenerator import torch import numpy as np ### ADD IN SOMETHING TO DO AVG AND STD OF A FEW TRIALS parser = argparse.ArgumentParser() parser.add_argument('--logdir', type=str, help='Where models are stored.') args = parser.parse_args() ctrl_file = join(args.logdir, 'ctrl', 'best.tar') assert exists(ctrl_file),\ "Controller was not trained..." device = torch.device('cpu') rewards = [] for i in range(10): generator = RolloutGenerator(args.logdir, device, 1000) with torch.no_grad(): final_reward = generator.rollout(None) rewards.append(final_reward) print('last_reward', final_reward) print('mean reward', np.mean(np.array(rewards))) print('std reward', np.std(np.array(rewards))) np.save(args.logdir + '/final_reward.npy', final_reward)
""" Test controller """ import argparse from os.path import join, exists from utils.misc import RolloutGenerator import torch parser = argparse.ArgumentParser() parser.add_argument('--logdir', type=str, help='Where models are stored.') parser.add_argument('--is-gate', action='store_true', help='Whether to use a highway for last actions to smoothen it out.') args = parser.parse_args() ctrl_file = join(args.logdir, 'ctrl', 'best.tar') assert exists(ctrl_file),\ "Controller was not trained..." device = torch.device('cpu') generator = RolloutGenerator(args.logdir, device, 1000, is_gate=args.is_gate) with torch.no_grad(): print(generator.rollout(None, render=True))