Beispiel #1
0
def rollout(env, savedir, policy):
    images = Path('images')
    max_frames = 1000
    num_seen_frames = 0
    done = False
    metadata = []

    if policy == 'random':
        actions = [env.action_space.sample() for _ in range(max_frames + 1)]
    elif policy == 'brown':
        actions = brownian_sample(env.action_space, max_frames + 1, dt=1 / 50)

    hidden = [
        torch.zeros(1, settings.mdrnn_hidden_dim).to(settings.device)
        for _ in range(2)
    ]
    rg = RolloutGenerator(mdir=Path(os.environ['top']),
                          device=settings.device,
                          time_limit=1000)

    obs = env.reset()

    while not done:
        if policy in ['random', 'brown']:
            action = actions[num_seen_frames]
        elif policy == 'controller':
            transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.Resize((64, 64)),
                transforms.ToTensor()
            ])
            obs = transform(obs).unsqueeze(0).to(settings.device)
            action, hidden = rg.get_action_and_transition(obs, hidden)
        env.render('rgb_array')  # Look into why this call is necessary.

        obs, reward, done, _ = env.step(action)

        if num_seen_frames == max_frames:
            done = True
            reward = -100.0

        if num_seen_frames > 0:
            cv2.imwrite(f'{savedir}/frame_{num_seen_frames:04}.png', obs)
            metadata.append(
                dict(idx=num_seen_frames,
                     action=action.tolist(),
                     reward=reward,
                     done=done))
        num_seen_frames += 1

    with open(f'{savedir}/metadata.json', 'w') as f:
        content = json.dumps(metadata, indent=4)
        f.write(content)
Beispiel #2
0
    def slave_routine(p_queue, r_queue, e_queue, p_index):
        """ Thread routine.

        Threads interact with p_queue, the parameters queue, r_queue, the result
        queue and e_queue the end queue. They pull parameters from p_queue, execute
        the corresponding rollout, then place the result in r_queue.

        Each parameter has its own unique id. Parameters are pulled as tuples
        (s_id, params) and results are pushed as (s_id, result).  The same
        parameter can appear multiple times in p_queue, displaying the same id
        each time.

        As soon as e_queue is non empty, the thread terminate.

        When multiple gpus are involved, the assigned gpu is determined by the
        process index p_index (gpu = p_index % n_gpus).

        :args p_queue: queue containing couples (s_id, parameters) to evaluate
        :args r_queue: where to place results (s_id, results)
        :args e_queue: as soon as not empty, terminate
        :args p_index: the process index
        """
        # init routine
        gpu = p_index % torch.cuda.device_count()
        device = torch.device(
            'cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu')

        # redirect streams
        sys.stdout = open(join(tmp_dir, str(getpid()) + '.out'), 'a')
        sys.stderr = open(join(tmp_dir, str(getpid()) + '.err'), 'a')

        # with torch.no_grad():
        #     r_gen = RolloutGenerator(logdir, device, time_limit)

        #     while e_queue.empty():
        #         if p_queue.empty():
        #             sleep(.1)
        #         else:
        #             s_id, params = p_queue.get()
        #             r_queue.put((s_id, r_gen.rollout(params)))

        with torch.no_grad():
            r_gen = RolloutGenerator(logdir, device, time_limit)

            while e_queue.empty():
                if p_queue.empty():
                    sleep(.1)
                else:
                    s_id, params = p_queue.get()
                    r_queue.put((s_id, r_gen.rollout(params)))
def controller_test_proc(controller, vae, mdrnn):
    step_log('4-3. controller_test_proc START!!')
    # define current best and load parameters
    if not os.path.exists(ctrl_dir):
        os.mkdir(ctrl_dir)
    ctrl_file = os.path.join(ctrl_dir, 'best.tar')

    print("Attempting to load previous best...")
    if os.path.exists(ctrl_file):
        # state = torch.load(ctrl_file, map_location={'cuda:0': 'cpu'})
        state = torch.load(ctrl_file)
        controller.load_state_dict(state['state_dict'])

    print("Controller Test Rollout START!!")
    with torch.no_grad():
        r_gen = RolloutGenerator(vae, mdrnn, controller, device,
                                 rollout_time_limit)
        r_gen.rollout(flatten_parameters(controller.parameters()), render=True)
    def rollout_routine():
        """ Thread routine.

        Threads interact with p_queue, the parameters queue, r_queue, the result
        queue and e_queue the end queue. They pull parameters from p_queue, execute
        the corresponding rollout, then place the result in r_queue.

        Each parameter has its own unique id. Parameters are pulled as tuples
        (s_id, params) and results are pushed as (s_id, result).  The same
        parameter can appear multiple times in p_queue, displaying the same id
        each time.

        As soon as e_queue is non empty, the thread terminate.

        When multiple gpus are involved, the assigned gpu is determined by the
        process index p_index (gpu = p_index % n_gpus).

        :args p_queue: queue containing couples (s_id, parameters) to evaluate
        :args r_queue: where to place results (s_id, results)
        :args e_queue: as soon as not empty, terminate
        :args p_index: the process index
        """
        # init routine
        #gpu = p_index % torch.cuda.device_count()
        #device = torch.device('cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu')

        # redirect streams
        #if not os.path.exists(tmp_dir):
        #    os.mkdir(tmp_dir)

        #sys.stdout = open(os.path.join(tmp_dir, 'rollout.out'), 'a')
        #sys.stderr = open(os.path.join(tmp_dir, 'rollout.err'), 'a')

        with torch.no_grad():
            r_gen = RolloutGenerator(vae, mdrnn, controller, device,
                                     rollout_time_limit)

            while not p_queue.empty():
                print('in rollout_routine, p_queue.get()')
                s_id, params = p_queue.get()
                print('r_queue.put() sid=%d' % s_id)
                r_queue.put((s_id, r_gen.rollout(params)))
                print('r_gen.rollout OK, r_queue.put()')
def slave_routine(p_queue, r_queue, e_queue, p_index):
    """ Thread routine.

    Threads interact with p_queue, the parameters queue, r_queue, the result
    queue and e_queue the end queue. They pull parameters from p_queue, execute
    the corresponding rollout, then place the result in r_queue.

    Each parameter has its own unique id. Parameters are pulled as tuples
    (s_id, params) and results are pushed as (s_id, result).  The same
    parameter can appear multiple times in p_queue, displaying the same id
    each time.

    As soon as e_queue is non empty, the thread terminate.

    When multiple gpus are involved, the assigned gpu is determined by the
    process index p_index (gpu = p_index % n_gpus).

    :args p_queue: queue containing couples (s_id, parameters) to evaluate
    :args r_queue: where to place results (s_id, results)
    :args e_queue: as soon as not empty, terminate
    :args p_index: the process index
    """
    # init routine
    gpu = p_index % torch.cuda.device_count()
    device = torch.device('cuda:{}'.format(gpu) if torch.cuda.is_available() else 'cpu')

    # redirect streams
    sys.stdout = open(join(tmp_dir, str(getpid()) + '.out'), 'a')
    sys.stderr = open(join(tmp_dir, str(getpid()) + '.err'), 'a')

    with torch.no_grad():
        r_gen = RolloutGenerator(args.logdir, device, time_limit)

        while e_queue.empty():
            if p_queue.empty():
                sleep(.1)
            else:
                s_id, params = p_queue.get()
                r_queue.put((s_id, r_gen.rollout(params)))
""" Test controller """
import argparse
from os.path import join, exists
from utils.misc import RolloutGenerator
import torch

parser = argparse.ArgumentParser()
parser.add_argument('--logdir', type=str, help='Where models are stored.')
args = parser.parse_args()

ctrl_file = join(args.logdir, 'ctrl', 'best.tar')

assert exists(ctrl_file),\
    "Controller was not trained..."

device = torch.device('cpu')

generator = RolloutGenerator(args.logdir, device, 1000)

with torch.no_grad():
    r = generator.rollout(None)
    print(r)
Beispiel #7
0
parser = argparse.ArgumentParser()
parser.add_argument('--logdir', type=str, help='Where models are stored.')
parser.add_argument('--iteration_num',
                    type=int,
                    help="Iteration number of which controller to use")
parser.add_argument('--rollouts',
                    type=int,
                    help='Number of rollouts to generate',
                    default=1)
parser.add_argument('--rollouts_dir',
                    type=str,
                    help='Directory to store the rollouts')
args = parser.parse_args()

ctrl_file = join(args.logdir, 'ctrl', 'best.tar')
if args.iteration_num is not None:
    ctrl_file = join(args.logdir, 'ctrl', 'iter_{}'.format(args.iteration_num),
                     'best.tar')

assert exists(ctrl_file),\
    "Controller was not trained..."

device = torch.device('cpu')

generator = RolloutGenerator(args.logdir, device, 1000, args.iteration_num)

for i in range(0, args.rollouts):
    with torch.no_grad():
        generator.rollout(None, rollout_dir=args.rollouts_dir, rollout_num=i)
Beispiel #8
0
import scipy.stats
import gym
import gym_minigrid
from gym_minigrid.wrappers import *
from gym_minigrid.window import Window

from utils.misc import RolloutGenerator
#import VAEtorch
#import trainmdrnn

Device_Used = "cuda"
device = torch.device("cuda" if Device_Used == "cuda" else "cpu")
#device = torch.device('cpu')

parser = argparse.ArgumentParser()
parser.add_argument('--logdir', type=str, help='Where models are stored.')
args = parser.parse_args()

args.logdir = 'D:\steps1000'

#RolloutGenerator Params:
# (mdir: model directory, device, time_limit: number of samples in goal space before exploration,
#number_goals: number of goals to set over lifetime of agent,
#Forward_model: 'M' = World Model, 'D' = Linear layers(do not use),
#hiddengoals: True = Goals set in World Model, False = goals as observations(basically IMGEPs),
#curiosityreward = True/False - not relevant in this implementation,
#static: True = static VAE and HiddenVAE, False = constantly evolving VAE and HiddenVAE)
generator = RolloutGenerator(args.logdir, device, 100, 200, True, False, False)

generator.rollout(None, render=False)  #run program
                    type=bool,
                    help='Boolean to not store videos or store them',
                    default=False)
args = parser.parse_args()

ctrl_file = join(args.logdir, 'ctrl', 'best.tar')
if args.iteration_num is not None:
    ctrl_file = join(args.logdir, 'ctrl', 'iter_{}'.format(args.iteration_num),
                     'best.tar')

assert exists(ctrl_file),\
    "Controller was not trained..."

device = torch.device('cpu')

generator = RolloutGenerator(args.logdir, device, 2000, args.iteration_num)

total_reward = 0
top5rollouts = []
video_dir = args.video_dir
if args.iteration_num is not None:
    video_dir = join(args.video_dir, "iter_{}".format(args.iteration_num))
makedirs(video_dir, exist_ok=True)

f = open(join(video_dir, "rewards"), "w+")
f.write("Reward Rollout#\n")

for i in range(0, args.rollouts):
    with torch.no_grad():
        store_video_dir = None if args.do_not_store_videos else video_dir
        reward = -generator.rollout(None,
""" Test controller """
import argparse
from os.path import join, exists
from utils.misc import RolloutGenerator
import torch

parser = argparse.ArgumentParser()
parser.add_argument('--logdir', type=str, help='Where models are stored.')
args = parser.parse_args()

ctrl_file = join(args.logdir, 'ctrl', 'best.tar')

assert exists(ctrl_file),\
    "Controller was not trained..."

device = torch.device('cpu')

generator = RolloutGenerator(args.logdir, device, 1000)

with torch.no_grad():
    generator.rollout(None)
Beispiel #11
0
from os.path import join, exists
from utils.misc import RolloutGenerator
import torch
import numpy as np

### ADD IN SOMETHING TO DO AVG AND STD OF A FEW TRIALS
parser = argparse.ArgumentParser()
parser.add_argument('--logdir', type=str, help='Where models are stored.')
args = parser.parse_args()

ctrl_file = join(args.logdir, 'ctrl', 'best.tar')

assert exists(ctrl_file),\
    "Controller was not trained..."

device = torch.device('cpu')

rewards = []

for i in range(10):
    generator = RolloutGenerator(args.logdir, device, 1000)
    with torch.no_grad():
        final_reward = generator.rollout(None)
    rewards.append(final_reward)

print('last_reward', final_reward)
print('mean reward', np.mean(np.array(rewards)))
print('std reward', np.std(np.array(rewards)))

np.save(args.logdir + '/final_reward.npy', final_reward)
""" Test controller """
import argparse
from os.path import join, exists
from utils.misc import RolloutGenerator
import torch

parser = argparse.ArgumentParser()
parser.add_argument('--logdir', type=str, help='Where models are stored.')
parser.add_argument('--is-gate', action='store_true', help='Whether to use a highway for last actions to smoothen it out.')
args = parser.parse_args()

ctrl_file = join(args.logdir, 'ctrl', 'best.tar')

assert exists(ctrl_file),\
    "Controller was not trained..."

device = torch.device('cpu')

generator = RolloutGenerator(args.logdir, device, 1000, is_gate=args.is_gate)

with torch.no_grad():
    print(generator.rollout(None, render=True))