Beispiel #1
0
    def train(self, model, optimizer, lossfunc, dataloader, selector, run, epoch):
        device = config.device()
        model.to(device)
        model.train()
        model.epoch = epoch

        for payload in dataloader:

            input_data = selector.get_input(payload, device)
            target_data = selector.get_target(payload, device)

            before_args = BeforeArgs(self, payload, input_data, target_data, model, optimizer, lossfunc, dataloader,
                                     selector, run, epoch)
            self.execute_before(before_args)

            optimizer.zero_grad()
            output_data = model(*input_data)
            if type(output_data) == tuple:
                loss = lossfunc(*output_data, *target_data)
            else:
                loss = lossfunc(output_data, *target_data)
            loss.backward()
            optimizer.step()

            after_args = AfterArgs(self, payload, input_data, target_data, model, optimizer, lossfunc, dataloader,
                                   selector, run, epoch, output_data, loss)
            self.execute_after(after_args)

            run.step += 1
Beispiel #2
0
    def test(self, model, lossfunc, dataloader, selector, run, epoch):
        device = config.device()
        model.to(device)
        model.eval()
        model.epoch = epoch

        for payload in dataloader:

            input_data = selector.get_input(payload, device)
            target_data = selector.get_target(payload, device)

            before_args = BeforeArgs(self, payload, input_data, target_data, model, None, lossfunc, dataloader,
                                     selector, run, epoch)
            self.execute_before(before_args)

            output_data = model(*input_data)
            if type(output_data) == tuple:
                loss = lossfunc(*output_data, *target_data)
            else:
                loss = lossfunc(output_data, *target_data)

            after_args = AfterArgs(self, payload, input_data, target_data, model, None, lossfunc, dataloader,
                                   selector, run, epoch, output_data, loss)
            self.execute_after(after_args)

            run.step += 1
Beispiel #3
0
    def test_rollout_gen(self):
        env = gym.make('SpaceInvaders-v4')
        models = config.basepath() / 'SpaceInvaders-v4' / 'models'
        visualsfile = models / 'GM53H301W5YS38XH'
        visuals = Storeable.load(str(visualsfile)).to(config.device())
        controllerfile = models / 'best_model68'
        controller = torch.load(str(controllerfile))
        policy = VCPolicy(visuals, controller)

        viewer = ImageViewer('screen', (420, 360), 'numpyRGB')

        for screen, observation, reward, done, info, action in RolloutGen(
                env, policy):
            viewer.update(screen)
Beispiel #4
0
    def test_gymsim(self):
        env = gym.make('SpaceInvaders-v4')
        models = config.basepath() / 'SpaceInvaders-v4' / 'models'
        visualsfile = models / 'GM53H301W5YS38XH'
        visuals = Storeable.load(str(visualsfile)).to(config.device())
        controllerfile = models / 'best_model68'
        controller = torch.load(str(controllerfile))
        policy = VCPolicy(visuals, controller)

        dataset = GymSimulatorDataset(env, policy, 3000)

        loader = torch.utils.data.DataLoader(dataset=dataset,
                                             batch_size=10,
                                             shuffle=False,
                                             drop_last=True)

        for screen, observation, action, reward, done, _ in loader:
            print(reward)
Beispiel #5
0
    def infer(self, model, lossfunc, dataloader, selector, run, epoch):
        device = config.device()
        model.to(device)
        model.eval()
        model.epoch = epoch

        for payload in dataloader:

            input_data = selector.get_input(payload, device)

            before_args = BeforeArgs(self, payload, input_data, None, model, None, lossfunc, dataloader,
                                     selector, run, epoch)
            self.execute_before(before_args)

            output_data = model(*input_data)

            after_args = AfterArgs(self, payload, input_data, None, model, None, lossfunc, dataloader,
                                   selector, run, epoch, output_data, None)
            self.execute_after(after_args)

            run.step += 1
Beispiel #6
0
def main(gym_environment, policy, output_dir):
    global action_encoder, input_viewer
    env = gym.make(gym_environment)
    action_encoder = ActionEncoder(env, gym_environment, ActionEmbedding(env), policy.v).to(config.device())
    rollout = Rollout(env)
    input_viewer = ImageViewer('input', (320, 480), 'numpyRGB')

    def frame(step):
        input_viewer.update(step.screen)

    rollout.register_before_hook(frame)

    def save(args):
        action_encoder.save_session()

    def action_encoder_frame(step):
        episode = step.meta['episode']
        file_path = config.basepath() / gym_environment / output_dir / str(episode)
        step.meta['filename'] = str(file_path)
        action_encoder.update(step)

    rollout.register_step(action_encoder_frame)
    rollout.register_end_session(save)

    for i_episode in range(531, 1000):
        rollout.rollout(policy, max_timesteps=3000, episode=i_episode)
Beispiel #7
0
    rollout.register_before_hook(frame)

    def save(args):
        action_encoder.save_session()

    def action_encoder_frame(step):
        episode = step.meta['episode']
        file_path = config.basepath() / gym_environment / output_dir / str(episode)
        step.meta['filename'] = str(file_path)
        action_encoder.update(step)

    rollout.register_step(action_encoder_frame)
    rollout.register_end_session(save)

    for i_episode in range(531, 1000):
        rollout.rollout(policy, max_timesteps=3000, episode=i_episode)


if __name__ == '__main__':

    gym_environment = 'SpaceInvaders-v4'

    visuals = Storeable.load('.\modelzoo\GM53H301W5YS38XH').to(config.device())
    controller = torch.load(r'.\modelzoo\best_model68')
    policy = VCPolicy(visuals, controller)

    output_dir = 'rl_raw_v2'

    main(gym_environment, policy, output_dir)

                                        target_transform=segmentor)

    co_ord_conv_data_package = DataPackage(co_ord_conv_shots, StandardSelect())

    channel_coder = Params(MultiChannelAE)
    opt = Params(Adam, lr=1e-3)

    run_fac = SimpleRunFac(increment_run=False)
    run_fac.run_list.append(Run(channel_coder, None, None, co_ord_conv_data_package,
                                run_name='shots_v1', trainer=SimpleInference()))

    #run_fac = SimpleRunFac.resume(r'C:\data\runs\549', co_ord_conv_data_package)
    batch_size = 1
    epochs = 30

    shot_encoder = Run.load_model(r'c:\data\runs\549\shots_v1\epoch0060.run').eval().to(device=config.device())
    player_encoder = Run.load_model(r'c:\data\runs\580\shots_v1\epoch0081.run').eval().to(device=config.device())
    invader_encoder = Run.load_model(r'c:\data\runs\587\shots_v1\epoch0030.run').eval().to(device=config.device())
    barrier_encoder = Run.load_model(r'c:\data\runs\588\barrier\epoch0019.run').eval().to(device=config.device())

    for model, opt, loss_fn, data_package, trainer, tester, run in run_fac:
        dev, train, test, selector = data_package.loaders(batch_size=batch_size)

        model.add_ae(shot_encoder, [0, 4, 5], [0])
        model.add_ae(player_encoder, [1, 4, 5], [1])
        model.add_ae(invader_encoder, [2, 4, 5], [2])
        model.add_ae(barrier_encoder, [3, 4, 5], [3])

        model.register_forward_hook(input_viewer.view_input)
        model.register_forward_hook(input2_viewer.view_input)
        model.register_forward_hook(shot_viewer.view_output)
Beispiel #9
0
from viewer import *

shots = tf.ColorMask(lower=[128, 128, 128], upper=[255, 255, 255], append=True)
player = tf.ColorMask(lower=[30, 100, 40], upper=[70, 180, 70], append=True)
cut_player = tf.SetRange(0, 60, 0, 210, [4])
invader = tf.ColorMask(lower=[120, 125, 25], upper=[140, 140, 130], append=True)
cut_invader = tf.SetRange(0, 30, 0, 210, [5])
barrier = tf.ColorMask(lower=[120, 74, 30], upper=[190, 100, 70], append=True)
select = tf.SelectChannels([3, 4, 5, 6])

observe = tf.ViewChannels('transform', (320, 480), channels=[0, 1, 2])

segmentor = TVT.Compose([shots, player, cut_player, invader, cut_invader,
                         barrier, select, TVT.ToTensor(), tf.CoordConv()])

device = config.device()
shot_encoder = Run.load_model(r'.\modelzoo\vision\shots.run').eval().to(device=config.device())
player_encoder = Run.load_model(r'.\modelzoo\vision\player.run').eval().to(device=config.device())
invaders_encoder = Run.load_model(r'.\modelzoo\vision\invaders.run').eval().to(device=config.device())
barrier_encoder = Run.load_model(r'.\modelzoo\vision\barrier.run').eval().to(device=config.device())
visuals = MultiChannelAE()
visuals.add_ae(shot_encoder, [0, 4, 5], [0])
visuals.add_ae(player_encoder, [1, 4, 5], [1])
visuals.add_ae(invaders_encoder, [2, 4, 5], [2])
visuals.add_ae(barrier_encoder, [3, 4, 5], [3])

visuals.register_forward_hook(view_decode)
visuals.register_forward_hook(view_image)

controller_file = config.basepath() / 'SpaceInvaders-v4' / 'policy_runs' / '12' / 'best_model0'
controller = torch.load(controller_file)
Beispiel #10
0
import gym
import torch
import pickle
from mentalitystorm.config import config
from mentalitystorm.runners import Run
from mentalitystorm.basemodels import MultiChannelAE
import torchvision.transforms as TVT
import mentalitystorm.transforms as tf
from models import ThreeKeyPolicyNet
from pathlib import Path
from tqdm import tqdm
import gym_wrappers
from viewer import view_image, view_decode
from cma import CMA

device = config.device()
shot_encoder = Run.load_model(r'.\modelzoo\vision\shots.run').eval().to(
    device=config.device())
player_encoder = Run.load_model(r'.\modelzoo\vision\player.run').eval().to(
    device=config.device())
invaders_encoder = Run.load_model(r'.\modelzoo\vision\invaders.run').eval().to(
    device=config.device())
barrier_encoder = Run.load_model(r'.\modelzoo\vision\barrier.run').eval().to(
    device=config.device())
visuals = MultiChannelAE()
visuals.add_ae(shot_encoder, [0, 4, 5], [0])
visuals.add_ae(player_encoder, [1, 4, 5], [1])
visuals.add_ae(invaders_encoder, [2, 4, 5], [2])
visuals.add_ae(barrier_encoder, [3, 4, 5], [3])

shots = tf.ColorMask(lower=[128, 128, 128], upper=[255, 255, 255], append=True)