def train(self, model, optimizer, lossfunc, dataloader, selector, run, epoch): device = config.device() model.to(device) model.train() model.epoch = epoch for payload in dataloader: input_data = selector.get_input(payload, device) target_data = selector.get_target(payload, device) before_args = BeforeArgs(self, payload, input_data, target_data, model, optimizer, lossfunc, dataloader, selector, run, epoch) self.execute_before(before_args) optimizer.zero_grad() output_data = model(*input_data) if type(output_data) == tuple: loss = lossfunc(*output_data, *target_data) else: loss = lossfunc(output_data, *target_data) loss.backward() optimizer.step() after_args = AfterArgs(self, payload, input_data, target_data, model, optimizer, lossfunc, dataloader, selector, run, epoch, output_data, loss) self.execute_after(after_args) run.step += 1
def test(self, model, lossfunc, dataloader, selector, run, epoch): device = config.device() model.to(device) model.eval() model.epoch = epoch for payload in dataloader: input_data = selector.get_input(payload, device) target_data = selector.get_target(payload, device) before_args = BeforeArgs(self, payload, input_data, target_data, model, None, lossfunc, dataloader, selector, run, epoch) self.execute_before(before_args) output_data = model(*input_data) if type(output_data) == tuple: loss = lossfunc(*output_data, *target_data) else: loss = lossfunc(output_data, *target_data) after_args = AfterArgs(self, payload, input_data, target_data, model, None, lossfunc, dataloader, selector, run, epoch, output_data, loss) self.execute_after(after_args) run.step += 1
def test_rollout_gen(self): env = gym.make('SpaceInvaders-v4') models = config.basepath() / 'SpaceInvaders-v4' / 'models' visualsfile = models / 'GM53H301W5YS38XH' visuals = Storeable.load(str(visualsfile)).to(config.device()) controllerfile = models / 'best_model68' controller = torch.load(str(controllerfile)) policy = VCPolicy(visuals, controller) viewer = ImageViewer('screen', (420, 360), 'numpyRGB') for screen, observation, reward, done, info, action in RolloutGen( env, policy): viewer.update(screen)
def test_gymsim(self): env = gym.make('SpaceInvaders-v4') models = config.basepath() / 'SpaceInvaders-v4' / 'models' visualsfile = models / 'GM53H301W5YS38XH' visuals = Storeable.load(str(visualsfile)).to(config.device()) controllerfile = models / 'best_model68' controller = torch.load(str(controllerfile)) policy = VCPolicy(visuals, controller) dataset = GymSimulatorDataset(env, policy, 3000) loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=10, shuffle=False, drop_last=True) for screen, observation, action, reward, done, _ in loader: print(reward)
def infer(self, model, lossfunc, dataloader, selector, run, epoch): device = config.device() model.to(device) model.eval() model.epoch = epoch for payload in dataloader: input_data = selector.get_input(payload, device) before_args = BeforeArgs(self, payload, input_data, None, model, None, lossfunc, dataloader, selector, run, epoch) self.execute_before(before_args) output_data = model(*input_data) after_args = AfterArgs(self, payload, input_data, None, model, None, lossfunc, dataloader, selector, run, epoch, output_data, None) self.execute_after(after_args) run.step += 1
def main(gym_environment, policy, output_dir): global action_encoder, input_viewer env = gym.make(gym_environment) action_encoder = ActionEncoder(env, gym_environment, ActionEmbedding(env), policy.v).to(config.device()) rollout = Rollout(env) input_viewer = ImageViewer('input', (320, 480), 'numpyRGB') def frame(step): input_viewer.update(step.screen) rollout.register_before_hook(frame) def save(args): action_encoder.save_session() def action_encoder_frame(step): episode = step.meta['episode'] file_path = config.basepath() / gym_environment / output_dir / str(episode) step.meta['filename'] = str(file_path) action_encoder.update(step) rollout.register_step(action_encoder_frame) rollout.register_end_session(save) for i_episode in range(531, 1000): rollout.rollout(policy, max_timesteps=3000, episode=i_episode)
rollout.register_before_hook(frame) def save(args): action_encoder.save_session() def action_encoder_frame(step): episode = step.meta['episode'] file_path = config.basepath() / gym_environment / output_dir / str(episode) step.meta['filename'] = str(file_path) action_encoder.update(step) rollout.register_step(action_encoder_frame) rollout.register_end_session(save) for i_episode in range(531, 1000): rollout.rollout(policy, max_timesteps=3000, episode=i_episode) if __name__ == '__main__': gym_environment = 'SpaceInvaders-v4' visuals = Storeable.load('.\modelzoo\GM53H301W5YS38XH').to(config.device()) controller = torch.load(r'.\modelzoo\best_model68') policy = VCPolicy(visuals, controller) output_dir = 'rl_raw_v2' main(gym_environment, policy, output_dir)
target_transform=segmentor) co_ord_conv_data_package = DataPackage(co_ord_conv_shots, StandardSelect()) channel_coder = Params(MultiChannelAE) opt = Params(Adam, lr=1e-3) run_fac = SimpleRunFac(increment_run=False) run_fac.run_list.append(Run(channel_coder, None, None, co_ord_conv_data_package, run_name='shots_v1', trainer=SimpleInference())) #run_fac = SimpleRunFac.resume(r'C:\data\runs\549', co_ord_conv_data_package) batch_size = 1 epochs = 30 shot_encoder = Run.load_model(r'c:\data\runs\549\shots_v1\epoch0060.run').eval().to(device=config.device()) player_encoder = Run.load_model(r'c:\data\runs\580\shots_v1\epoch0081.run').eval().to(device=config.device()) invader_encoder = Run.load_model(r'c:\data\runs\587\shots_v1\epoch0030.run').eval().to(device=config.device()) barrier_encoder = Run.load_model(r'c:\data\runs\588\barrier\epoch0019.run').eval().to(device=config.device()) for model, opt, loss_fn, data_package, trainer, tester, run in run_fac: dev, train, test, selector = data_package.loaders(batch_size=batch_size) model.add_ae(shot_encoder, [0, 4, 5], [0]) model.add_ae(player_encoder, [1, 4, 5], [1]) model.add_ae(invader_encoder, [2, 4, 5], [2]) model.add_ae(barrier_encoder, [3, 4, 5], [3]) model.register_forward_hook(input_viewer.view_input) model.register_forward_hook(input2_viewer.view_input) model.register_forward_hook(shot_viewer.view_output)
from viewer import * shots = tf.ColorMask(lower=[128, 128, 128], upper=[255, 255, 255], append=True) player = tf.ColorMask(lower=[30, 100, 40], upper=[70, 180, 70], append=True) cut_player = tf.SetRange(0, 60, 0, 210, [4]) invader = tf.ColorMask(lower=[120, 125, 25], upper=[140, 140, 130], append=True) cut_invader = tf.SetRange(0, 30, 0, 210, [5]) barrier = tf.ColorMask(lower=[120, 74, 30], upper=[190, 100, 70], append=True) select = tf.SelectChannels([3, 4, 5, 6]) observe = tf.ViewChannels('transform', (320, 480), channels=[0, 1, 2]) segmentor = TVT.Compose([shots, player, cut_player, invader, cut_invader, barrier, select, TVT.ToTensor(), tf.CoordConv()]) device = config.device() shot_encoder = Run.load_model(r'.\modelzoo\vision\shots.run').eval().to(device=config.device()) player_encoder = Run.load_model(r'.\modelzoo\vision\player.run').eval().to(device=config.device()) invaders_encoder = Run.load_model(r'.\modelzoo\vision\invaders.run').eval().to(device=config.device()) barrier_encoder = Run.load_model(r'.\modelzoo\vision\barrier.run').eval().to(device=config.device()) visuals = MultiChannelAE() visuals.add_ae(shot_encoder, [0, 4, 5], [0]) visuals.add_ae(player_encoder, [1, 4, 5], [1]) visuals.add_ae(invaders_encoder, [2, 4, 5], [2]) visuals.add_ae(barrier_encoder, [3, 4, 5], [3]) visuals.register_forward_hook(view_decode) visuals.register_forward_hook(view_image) controller_file = config.basepath() / 'SpaceInvaders-v4' / 'policy_runs' / '12' / 'best_model0' controller = torch.load(controller_file)
import gym import torch import pickle from mentalitystorm.config import config from mentalitystorm.runners import Run from mentalitystorm.basemodels import MultiChannelAE import torchvision.transforms as TVT import mentalitystorm.transforms as tf from models import ThreeKeyPolicyNet from pathlib import Path from tqdm import tqdm import gym_wrappers from viewer import view_image, view_decode from cma import CMA device = config.device() shot_encoder = Run.load_model(r'.\modelzoo\vision\shots.run').eval().to( device=config.device()) player_encoder = Run.load_model(r'.\modelzoo\vision\player.run').eval().to( device=config.device()) invaders_encoder = Run.load_model(r'.\modelzoo\vision\invaders.run').eval().to( device=config.device()) barrier_encoder = Run.load_model(r'.\modelzoo\vision\barrier.run').eval().to( device=config.device()) visuals = MultiChannelAE() visuals.add_ae(shot_encoder, [0, 4, 5], [0]) visuals.add_ae(player_encoder, [1, 4, 5], [1]) visuals.add_ae(invaders_encoder, [2, 4, 5], [2]) visuals.add_ae(barrier_encoder, [3, 4, 5], [3]) shots = tf.ColorMask(lower=[128, 128, 128], upper=[255, 255, 255], append=True)