def __init__(self, temperature=0.25): # constants self.TEMPERATURE = temperature self.SEQLEN = 99 self.DT = 0.2 # should be the same as data rnn was trained with initial_z_path = os.path.expanduser( "~/navrep/datasets/M/toy/000_mus_logvars_robotstates_actions_rewards_dones.npz" ) tcn_model_path = os.path.expanduser("~/navrep/models/M/toytcn.json") vae_model_path = os.path.expanduser("~/navrep/models/V/toyvae.json") # V + M Models reset_graph() params = sample_hps_params._replace(max_seq_len=self.SEQLEN + 1) self.tcn = MDNTCN(params, gpu_mode=False) self.vae = ConvVAE(batch_size=1, is_training=False) self.vae.load_json(vae_model_path) self.tcn.load_json(tcn_model_path) # load initial image encoding arrays = np.load(initial_z_path) # other tools self.rings_def = generate_rings(64, 64) self.viewer = None # environment state variables self.reset() # hot-start the tcn state self.sequence_z = arrays["mus"][:self.SEQLEN].reshape( (1, self.SEQLEN, _Z)) self.sequence_action = arrays["actions"][:self.SEQLEN].reshape( (1, self.SEQLEN, 3)) self.sequence_restart = arrays["dones"][:self.SEQLEN].reshape( (1, self.SEQLEN))
def __init__(self, temperature=0.25): # constants self.TEMPERATURE = temperature self.DT = 0.2 # should be the same as data rnn was trained with initial_z_path = os.path.expanduser( "~/navrep/datasets/M/im/corridor_koze_kids_bag_mus_logvars_robotstates_actions_rewards_dones.npz" ) rnn_model_path = os.path.expanduser("~/navrep/models/M/imrnn.json") vae_model_path = os.path.expanduser("~/navrep/models/V/imvae.json") # V + M Models reset_graph() self.rnn = MDNRNN(sample_hps_params, gpu_mode=False) self.vae = ConvVAE(batch_size=1, is_training=False, channels=3) self.vae.load_json(vae_model_path) self.rnn.load_json(rnn_model_path) # load initial image encoding arrays = np.load(initial_z_path) initial_mu = arrays["mus"][0] initial_logvar = arrays["logvars"][0] self.initial_z = initial_mu + np.exp(initial_logvar / 2.0) * np.random.randn( *(initial_mu.shape) ) # other tools self.viewer = None # environment state variables self.reset() # hot-start the rnn state for i in range(20): self.step(np.array([0,0,0]), override_next_z=self.initial_z)
def __init__( self, temperature=0.25, initial_z_path=os.path. expanduser( "~/navrep/datasets/M/ian/000_mus_logvars_robotstates_actions_rewards_dones.npz" ), rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"), vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"), ): # constants self.TEMPERATURE = temperature self.DT = 0.5 # should be the same as data rnn was trained with # V + M Models reset_graph() self.rnn = MDNRNN(sample_hps_params, gpu_mode=False) self.vae = ConvVAE(batch_size=1, is_training=False) self.vae.load_json(vae_model_path) self.rnn.load_json(rnn_model_path) # load initial image encoding arrays = np.load(initial_z_path) initial_mu = arrays["mus"][0] initial_logvar = arrays["logvars"][0] initial_robotstate = arrays["robotstates"][0] ini_lidar_z = initial_mu + np.exp( initial_logvar / 2.0) * np.random.randn(*(initial_mu.shape)) ini_goal_z = initial_robotstate[:2] / MAX_GOAL_DIST self.initial_z = np.concatenate([ini_lidar_z, ini_goal_z], axis=-1) # other tools self.rings_def = generate_rings(64, 64) self.viewer = None # environment state variables self.reset() # hot-start the rnn state for i in range(20): self.step(np.array([0, 0, 0]), override_next_z=self.initial_z)
from navrep.tools.rings import generate_rings from navrep.models.vae2d import ConvVAE, reset_graph DEBUG_PLOTTING = True # Parameters for training batch_size = 1 NUM_EPOCH = 100 DATA_DIR = "record" HOME = os.path.expanduser("~") vae_model_path = os.path.expanduser("~/navrep/models/V/vae.json") # create network reset_graph() vae = ConvVAE(batch_size=batch_size, is_training=False) # load vae.load_json(vae_model_path) # create training dataset dataset = archive_to_lidar_dataset("~/navrep/datasets/V/ian", limit=180) if len(dataset) == 0: raise ValueError("no scans found, exiting") print(len(dataset), "scans in dataset.") # split into batches: total_length = len(dataset) num_batches = int(np.floor(total_length / batch_size)) # rings converter
log_path = os.path.expanduser( "~/navrep/logs/V/irlvae_train_log_{}.csv".format(START_TIME)) if common_args.dry_run: model_save_path = model_save_path.replace( os.path.expanduser("~/navrep"), "/tmp/navrep") log_path = log_path.replace(os.path.expanduser("~/navrep"), "/tmp/navrep") make_dir_if_not_exists(os.path.dirname(model_save_path)) make_dir_if_not_exists(os.path.dirname(log_path)) # create network reset_graph() vae = ConvVAE(z_size=_Z, batch_size=batch_size, is_training=True, reuse=False) vae.print_trainable_params() # create training dataset dataset = archive_to_lidar_dataset(dataset_dir) if len(dataset) == 0: raise ValueError("no scans found, exiting") print(len(dataset), "scans in dataset.") # split into batches: total_length = len(dataset) num_batches = int(np.floor(total_length / batch_size)) # rings converter rings_def = generate_rings(64, 64)
time_taken = end - start # print('time taken to create batches', time_taken) batch_state = model.sess.run(model.initial_state) for batch_z, batch_action, batch_done, batch_reward in zip( z_batches, action_batches, done_batches, reward_batches ): if False: # Visually check that the batch is sound from navrep.models.vae2d import ConvVAE import matplotlib.pyplot as plt from navrep.tools.rings import generate_rings reset_graph() vae = ConvVAE(batch_size=1, is_training=False) vae.load_json(vae_model_path) rings_def = generate_rings(64, 64) rings_pred = vae.decode(batch_z[0]) * rings_def["rings_to_bool"] plt.ion() for i, ring in enumerate(rings_pred): rings_def["visualize_rings"](ring, scan=None) plt.ylim([0, 10]) plt.title(str(batch_action[0, i])) plt.pause(0.1) exit() if False: from navrep.models.vae2d import ConvVAE from navrep.tools.render import render_lidar_batch from navrep.tools.rings import generate_rings
from navrep.models.rnn import reset_graph, sample_hps_params, MDNRNN, get_pi_idx from navrep.models.vae2d import ConvVAE # parameters TEMPERATURE = 0.5 _Z = 32 sequence_z_path = os.path.expanduser( "~/navrep/datasets/M/im/corridor_koze_kids_bag_mus_logvars_actions_rewards_dones.npz" ) rnn_model_path = os.path.expanduser("~/navrep/models/M/imrnn.json") vae_model_path = os.path.expanduser("~/navrep/models/V/imvae.json") reset_graph() imrnn = MDNRNN(sample_hps_params, gpu_mode=False) imvae = ConvVAE(batch_size=1, is_training=False, channels=3) imvae.load_json(vae_model_path) imrnn.load_json(rnn_model_path) # load sequence image encoding arrays = np.load(sequence_z_path) sequence_action = arrays["actions"] sequence_mu = arrays["mus"] sequence_logvar = arrays["logvars"] sequence_z = sequence_mu + np.exp( sequence_logvar / 2.0) * np.random.randn(*(sequence_mu.shape)) SEQUENCE_LENGTH = len(sequence_mu) prev_z = sequence_z[0] prev_z_predicted = sequence_z[0]
class ToyTCNDreamEnv(object): def __init__(self, temperature=0.25): # constants self.TEMPERATURE = temperature self.SEQLEN = 99 self.DT = 0.2 # should be the same as data rnn was trained with initial_z_path = os.path.expanduser( "~/navrep/datasets/M/toy/000_mus_logvars_robotstates_actions_rewards_dones.npz" ) tcn_model_path = os.path.expanduser("~/navrep/models/M/toytcn.json") vae_model_path = os.path.expanduser("~/navrep/models/V/toyvae.json") # V + M Models reset_graph() params = sample_hps_params._replace(max_seq_len=self.SEQLEN + 1) self.tcn = MDNTCN(params, gpu_mode=False) self.vae = ConvVAE(batch_size=1, is_training=False) self.vae.load_json(vae_model_path) self.tcn.load_json(tcn_model_path) # load initial image encoding arrays = np.load(initial_z_path) # other tools self.rings_def = generate_rings(64, 64) self.viewer = None # environment state variables self.reset() # hot-start the tcn state self.sequence_z = arrays["mus"][:self.SEQLEN].reshape( (1, self.SEQLEN, _Z)) self.sequence_action = arrays["actions"][:self.SEQLEN].reshape( (1, self.SEQLEN, 3)) self.sequence_restart = arrays["dones"][:self.SEQLEN].reshape( (1, self.SEQLEN)) def step(self, action, override_next_z=None): # predict for fixed-sized sequence, lpadded with zeros self.sequence_action[0, -1, :] = action feed = { self.tcn.input_z: np.reshape(self.sequence_z[:self.SEQLEN], (1, self.SEQLEN, _Z)), self.tcn.input_action: np.reshape(self.sequence_action[:self.SEQLEN], (1, self.SEQLEN, 3)), self.tcn.input_restart: np.reshape(self.sequence_restart[:self.SEQLEN], (1, self.SEQLEN)), } [logmix, mean, logstd, logrestart] = self.tcn.sess.run([ self.tcn.out_logmix, self.tcn.out_mean, self.tcn.out_logstd, self.tcn.out_restart_logits ], feed) logmix = logmix.reshape( (self.SEQLEN, _Z, sample_hps_params.num_mixture)) mean = mean.reshape((self.SEQLEN, _Z, sample_hps_params.num_mixture)) logstd = logstd.reshape( (self.SEQLEN, _Z, sample_hps_params.num_mixture)) logrestart = logrestart.reshape((self.SEQLEN, 1)) OUTWIDTH = _Z # adjust temperatures logmix2 = np.copy(logmix) / self.TEMPERATURE logmix2 -= logmix2.max() logmix2 = np.exp(logmix2) logmix2 /= logmix2.sum(axis=-1).reshape((self.SEQLEN, _Z, 1)) mixture_idx = np.zeros((self.SEQLEN, OUTWIDTH)) chosen_mean = np.zeros((self.SEQLEN, OUTWIDTH)) chosen_logstd = np.zeros((self.SEQLEN, OUTWIDTH)) for i in range(len(mixture_idx)): for j in range(OUTWIDTH): idx = get_pi_idx(np.random.rand(), logmix2[i, j]) mixture_idx[i, j] = idx chosen_mean[i, j] = mean[i, j][idx] chosen_logstd[i, j] = logstd[i, j][idx] rand_gaussian = np.random.randn(self.SEQLEN, OUTWIDTH) * np.sqrt( self.TEMPERATURE) seq_z_predicted = chosen_mean + np.exp(chosen_logstd) * rand_gaussian if sample_hps_params.differential_z: seq_z_predicted = np.reshape( self.sequence_z[:self.SEQLEN], (1, self.SEQLEN, _Z)) + seq_z_predicted # pick last output next_z = seq_z_predicted[0, -1, :] next_restart = 0 # if logrestart[0] > 0: # next_restart = 1 # update variables self.sequence_z[0, :-1, :] = self.sequence_z[0, 1:, :] self.sequence_action[0, :-1, :] = self.sequence_action[0, 1:, :] self.sequence_restart[0, :-1] = self.sequence_restart[0, 1:] self.sequence_z[0, -1, :] = next_z self.sequence_action[0, -1, :] = np.nan self.sequence_restart[0, -1] = next_restart # logging-only vars, used for rendering self.prev_action = action self.episode_step += 1 return next_z, None, next_restart, {} def reset(self): # logging vars self.prev_action = np.array([0.0, 0.0, 0.0]) self.episode_step = 0 def render(self, mode="human", close=False): rings_pred = (self.vae.decode(self.sequence_z[0, -1].reshape(1, _Z)) * self.rings_def["rings_to_bool"]) predicted_ranges = self.rings_def["rings_to_lidar"](rings_pred, 1080) if mode == "rgb_array": raise NotImplementedError elif mode == "human": # Window and viewport size WINDOW_W = 256 WINDOW_H = 256 M_PER_PX = 25.6 / WINDOW_H VP_W = WINDOW_W VP_H = WINDOW_H from gym.envs.classic_control import rendering import pyglet from pyglet import gl # Create viewer if self.viewer is None: self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label( "0000", font_size=12, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x="left", anchor_y="center", color=(255, 255, 255, 255), ) # self.transform = rendering.Transform() self.currently_rendering_iteration = 0 self.image_lock = threading.Lock() # Render in pyglet def make_circle(c, r, res=10): thetas = np.linspace(0, 2 * np.pi, res + 1)[:-1] verts = np.zeros((res, 2)) verts[:, 0] = c[0] + r * np.cos(thetas) verts[:, 1] = c[1] + r * np.sin(thetas) return verts with self.image_lock: self.currently_rendering_iteration += 1 self.viewer.draw_circle(r=10, color=(0.3, 0.3, 0.3)) win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() gl.glViewport(0, 0, VP_W, VP_H) # colors bgcolor = np.array([0.4, 0.8, 0.4]) nosecolor = np.array([0.3, 0.3, 0.3]) lidarcolor = np.array([1.0, 0.0, 0.0]) # Green background gl.glBegin(gl.GL_QUADS) gl.glColor4f(bgcolor[0], bgcolor[1], bgcolor[2], 1.0) gl.glVertex3f(0, VP_H, 0) gl.glVertex3f(VP_W, VP_H, 0) gl.glVertex3f(VP_W, 0, 0) gl.glVertex3f(0, 0, 0) gl.glEnd() # LIDAR i = WINDOW_W / 2.0 j = WINDOW_H / 2.0 angle = np.pi / 2.0 scan = np.squeeze(predicted_ranges) lidar_angles = np.linspace(0, 2 * np.pi, len(scan) + 1)[:-1] i_ray_ends = i + scan / M_PER_PX * np.cos(lidar_angles) j_ray_ends = j + scan / M_PER_PX * np.sin(lidar_angles) is_in_fov = np.cos(lidar_angles - angle) >= 0.78 for ray_idx in range(len(scan)): end_i = i_ray_ends[ray_idx] end_j = j_ray_ends[ray_idx] gl.glBegin(gl.GL_LINE_LOOP) if is_in_fov[ray_idx]: gl.glColor4f(1.0, 1.0, 0.0, 0.1) else: gl.glColor4f(lidarcolor[0], lidarcolor[1], lidarcolor[2], 0.1) gl.glVertex3f(i, j, 0) gl.glVertex3f(end_i, end_j, 0) gl.glEnd() # Agent body i = WINDOW_W / 2.0 j = WINDOW_H / 2.0 r = 0.3 / M_PER_PX angle = np.pi / 2.0 poly = make_circle((i, j), r) gl.glBegin(gl.GL_POLYGON) color = np.array([1.0, 1.0, 1.0]) gl.glColor4f(color[0], color[1], color[2], 1) for vert in poly: gl.glVertex3f(vert[0], vert[1], 0) gl.glEnd() # Direction triangle inose = i + r * np.cos(angle) jnose = j + r * np.sin(angle) iright = i + 0.3 * r * -np.sin(angle) jright = j + 0.3 * r * np.cos(angle) ileft = i - 0.3 * r * -np.sin(angle) jleft = j - 0.3 * r * np.cos(angle) gl.glBegin(gl.GL_TRIANGLES) gl.glColor4f(nosecolor[0], nosecolor[1], nosecolor[2], 1) gl.glVertex3f(inose, jnose, 0) gl.glVertex3f(iright, jright, 0) gl.glVertex3f(ileft, jleft, 0) gl.glEnd() # Text self.score_label.text = "A {:.1f} {:.1f} {:.1f} S {}".format( self.prev_action[0], self.prev_action[1], self.prev_action[2], self.episode_step, ) self.score_label.draw() win.flip() return self.viewer.isopen
elif backend == "VAELSTM": from navrep.scripts.train_vaelstm import _Z, _H elif backend == "VAE_LSTM": from navrep.scripts.train_vae import _Z from navrep.scripts.train_rnn import _H elif backend == "VAE1D_LSTM": from navrep.scripts.train_vae1d import _Z from navrep.scripts.train_rnn import _H # load W / M model model = None if backend == "VAE_LSTM": vae_model_path = os.path.join(MODELDIR, "V", environment + "vae.json") reset_graph() vae = ConvVAE(z_size=_Z, batch_size=1, is_training=False) vae.load_json(vae_model_path) hps = default_hps() hps = hps._replace(seq_width=_Z + _G, action_width=_A, rnn_size=_H) rnn = MDNRNN(hps, gpu_mode=gpu) rnn.load_json(path) elif backend == "VAE1D_LSTM": vae_model_path = os.path.join(MODELDIR, "V", environment + "vae1d.json") reset_graph() reset_graph() vae = Conv1DVAE(z_size=_Z, batch_size=1, is_training=False) vae.load_json(vae_model_path) hps = default_hps() hps = hps._replace(seq_width=_Z + _G, action_width=_A, rnn_size=_H) rnn = MDNRNN(hps, gpu_mode=gpu)
class ImDreamEnv(object): def __init__(self, temperature=0.25): # constants self.TEMPERATURE = temperature self.DT = 0.2 # should be the same as data rnn was trained with initial_z_path = os.path.expanduser( "~/navrep/datasets/M/im/corridor_koze_kids_bag_mus_logvars_robotstates_actions_rewards_dones.npz" ) rnn_model_path = os.path.expanduser("~/navrep/models/M/imrnn.json") vae_model_path = os.path.expanduser("~/navrep/models/V/imvae.json") # V + M Models reset_graph() self.rnn = MDNRNN(sample_hps_params, gpu_mode=False) self.vae = ConvVAE(batch_size=1, is_training=False, channels=3) self.vae.load_json(vae_model_path) self.rnn.load_json(rnn_model_path) # load initial image encoding arrays = np.load(initial_z_path) initial_mu = arrays["mus"][0] initial_logvar = arrays["logvars"][0] self.initial_z = initial_mu + np.exp(initial_logvar / 2.0) * np.random.randn( *(initial_mu.shape) ) # other tools self.viewer = None # environment state variables self.reset() # hot-start the rnn state for i in range(20): self.step(np.array([0,0,0]), override_next_z=self.initial_z) def step(self, action, override_next_z=None): feed = { self.rnn.input_z: np.reshape(self.prev_z, (1, 1, _Z)), self.rnn.input_action: np.reshape(action, (1, 1, 3)), self.rnn.input_restart: np.reshape(self.prev_restart, (1, 1)), self.rnn.initial_state: self.rnn_state, } [logmix, mean, logstd, logrestart, next_state] = self.rnn.sess.run( [ self.rnn.out_logmix, self.rnn.out_mean, self.rnn.out_logstd, self.rnn.out_restart_logits, self.rnn.final_state, ], feed, ) OUTWIDTH = _Z if self.TEMPERATURE == 0: # deterministically pick max of MDN distribution mixture_idx = np.argmax(logmix, axis=-1) chosen_mean = mean[(range(OUTWIDTH), mixture_idx)] chosen_logstd = logstd[(range(OUTWIDTH), mixture_idx)] next_z = chosen_mean else: # sample from modelled MDN distribution mixprob = np.copy(logmix) / self.TEMPERATURE # adjust temperatures mixprob -= mixprob.max() mixprob = np.exp(mixprob) mixprob /= mixprob.sum(axis=1).reshape(OUTWIDTH, 1) mixture_idx = np.zeros(OUTWIDTH) chosen_mean = np.zeros(OUTWIDTH) chosen_logstd = np.zeros(OUTWIDTH) for j in range(OUTWIDTH): idx = get_pi_idx(np.random.rand(), mixprob[j]) mixture_idx[j] = idx chosen_mean[j] = mean[j][idx] chosen_logstd[j] = logstd[j][idx] rand_gaussian = np.random.randn(OUTWIDTH) * np.sqrt(self.TEMPERATURE) next_z = chosen_mean + np.exp(chosen_logstd) * rand_gaussian if sample_hps_params.differential_z: next_z = self.prev_z + next_z next_restart = 0 # if logrestart[0] > 0: # next_restart = 1 self.prev_z = next_z if override_next_z is not None: self.prev_z = override_next_z self.prev_restart = next_restart self.rnn_state = next_state # logging-only vars, used for rendering self.prev_action = action self.episode_step += 1 return next_z, None, next_restart, {} def reset(self): self.prev_z = self.initial_z self.prev_restart = np.array([1]) self.rnn_state = self.rnn.sess.run(self.rnn.zero_state) # logging vars self.prev_action = np.array([0.0, 0.0, 0.0]) self.episode_step = 0 def render(self, mode="human", close=False): img_pred = (self.vae.decode(self.prev_z.reshape(1, _Z)) * 255).astype(np.uint8) img_pred = img_pred.reshape(_64, _64, 3) if mode == "rgb_array": raise NotImplementedError elif mode == "human": # Window and viewport size WINDOW_W = 256 WINDOW_H = 256 VP_W = WINDOW_W VP_H = WINDOW_H from gym.envs.classic_control import rendering import pyglet from pyglet import gl # Create pyglet image # pixels = [ # 255, 0, 0, 0, 255, 0, 0, 0, 255, # RGB values range from # 255, 0, 0, 255, 0, 0, 255, 0, 0, # 0 to 255 for each color # 255, 0, 0, 255, 0, 0, 255, 0, 0, # component. # ] from pyglet.gl.gl import GLubyte pixels = img_pred.flatten() rawData = (GLubyte * len(pixels))(*pixels) image_data = pyglet.image.ImageData(_64, _64, 'RGB', rawData) # Create viewer if self.viewer is None: self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label( "0000", font_size=12, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x="left", anchor_y="center", color=(255, 255, 255, 255), ) # self.transform = rendering.Transform() self.currently_rendering_iteration = 0 self.image_lock = threading.Lock() # Render in pyglet with self.image_lock: self.currently_rendering_iteration += 1 self.viewer.draw_circle(r=10, color=(0.3, 0.3, 0.3)) win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() gl.glViewport(0, 0, VP_W, VP_H) # Image image_data.blit(96,96) # Text self.score_label.text = "A {:.1f} {:.1f} {:.1f} S {}".format( self.prev_action[0], self.prev_action[1], self.prev_action[2], self.episode_step, ) self.score_label.draw() win.flip() return self.viewer.isopen
from __future__ import print_function import numpy as np import os from navrep.models.vae2d import ConvVAE, reset_graph from navrep.tools.rings import generate_rings from pyniel.python_tools.path_tools import make_dir_if_not_exists # create network reset_graph() imvae = ConvVAE( batch_size=1, is_training=True, channels=3, ) imvae.load_json(os.path.expanduser("~/navrep/models/V/imvae.json")) # rings converter rings_def = generate_rings(64, 64) # labels to learn are x, r, d (obs, reward, done) dataset_folder = os.path.expanduser("~/navrep/datasets/V/im") files = [] for dirpath, dirnames, filenames in os.walk(dataset_folder): for filename in [f for f in filenames if f.endswith(".npz")]: files.append(os.path.join(dirpath, filename)) files = sorted(files) for path in files: arrays = np.load(path) images = arrays["images"] rewards = arrays["rewards"]
def __init__(self, backend, encoding, rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"), rnn1d_model_path=os.path.expanduser("~/navrep/models/M/rnn1d.json"), vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"), vae1d_model_path=os.path.expanduser("~/navrep/models/V/vae1d.json"), gpt_model_path=os.path.expanduser("~/navrep/models/W/gpt"), gpt1d_model_path=os.path.expanduser("~/navrep/models/W/gpt1d"), vae1dlstm_model_path=os.path.expanduser("~/navrep/models/W/vae1dlstm"), vaelstm_model_path=os.path.expanduser("~/navrep/models/W/vaelstm"), gpu=False, encoder_to_share_model_with=None, # another EnvEncoder ): LIDAR_NORM_FACTOR = None if backend == "GPT": from navrep.scripts.train_gpt import _Z, _H elif backend == "GPT1D": from navrep.scripts.train_gpt1d import _Z, _H from navrep.tools.wdataset import LIDAR_NORM_FACTOR elif backend == "VAE1DLSTM": from navrep.scripts.train_vae1dlstm import _Z, _H from navrep.tools.wdataset import LIDAR_NORM_FACTOR elif backend == "VAELSTM": from navrep.scripts.train_vaelstm import _Z, _H elif backend == "VAE_LSTM": from navrep.scripts.train_vae import _Z from navrep.scripts.train_rnn import _H elif backend == "VAE1D_LSTM": from navrep.scripts.train_vae1d import _Z from navrep.scripts.train_rnn import _H from navrep.scripts.train_vae1d import MAX_LIDAR_DIST as LIDAR_NORM_FACTOR self._Z = _Z self._H = _H self.LIDAR_NORM_FACTOR = LIDAR_NORM_FACTOR self.encoding = encoding self.backend = backend if self.encoding == "V_ONLY": self.encoding_dim = _Z + _RS elif self.encoding == "VM": self.encoding_dim = _Z + _H + _RS elif self.encoding == "M_ONLY": self.encoding_dim = _H + _RS else: raise NotImplementedError self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.encoding_dim,), dtype=np.float32) # V + M Models if encoder_to_share_model_with is not None: self.vae = encoder_to_share_model_with.vae self.rnn = encoder_to_share_model_with.rnn else: # load world model if self.backend == "VAE_LSTM": reset_graph() self.vae = ConvVAE(z_size=_Z, batch_size=1, is_training=False) self.vae.load_json(vae_model_path) if self.encoding in ["VM", "M_ONLY"]: hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H) self.rnn = MDNRNN(hps, gpu_mode=gpu) self.rnn.load_json(rnn_model_path) elif self.backend == "VAE1D_LSTM": reset_graph() self.vae = Conv1DVAE(z_size=_Z, batch_size=1, is_training=False) self.vae.load_json(vae1d_model_path) if self.encoding in ["VM", "M_ONLY"]: hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H) self.rnn = MDNRNN(hps, gpu_mode=gpu) self.rnn.load_json(rnn1d_model_path) elif self.backend == "GPT": mconf = GPTConfig(BLOCK_SIZE, _H) model = GPT(mconf, gpu=gpu) load_checkpoint(model, gpt_model_path, gpu=gpu) self.vae = model self.rnn = model elif self.backend == "GPT1D": mconf = GPTConfig(BLOCK_SIZE, _H) model = GPT1D(mconf, gpu=gpu) load_checkpoint(model, gpt1d_model_path, gpu=gpu) self.vae = model self.rnn = model elif self.backend == "VAELSTM": mconf = VAELSTMConfig(_Z, _H) model = VAELSTM(mconf, gpu=gpu) load_checkpoint(model, vaelstm_model_path, gpu=gpu) self.vae = model self.rnn = model elif self.backend == "VAE1DLSTM": mconf = VAE1DLSTMConfig(_Z, _H) model = VAE1DLSTM(mconf, gpu=gpu) load_checkpoint(model, vae1dlstm_model_path, gpu=gpu) self.vae = model self.rnn = model else: raise NotImplementedError # other tools self.rings_def = generate_rings(_64, _64) self.viewer = None # environment state variables self.reset()
class EnvEncoder(object): """ Generic class to encode the observations of an environment, look at EncodedEnv to see how it is typically used """ def __init__(self, backend, encoding, rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"), rnn1d_model_path=os.path.expanduser("~/navrep/models/M/rnn1d.json"), vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"), vae1d_model_path=os.path.expanduser("~/navrep/models/V/vae1d.json"), gpt_model_path=os.path.expanduser("~/navrep/models/W/gpt"), gpt1d_model_path=os.path.expanduser("~/navrep/models/W/gpt1d"), vae1dlstm_model_path=os.path.expanduser("~/navrep/models/W/vae1dlstm"), vaelstm_model_path=os.path.expanduser("~/navrep/models/W/vaelstm"), gpu=False, encoder_to_share_model_with=None, # another EnvEncoder ): LIDAR_NORM_FACTOR = None if backend == "GPT": from navrep.scripts.train_gpt import _Z, _H elif backend == "GPT1D": from navrep.scripts.train_gpt1d import _Z, _H from navrep.tools.wdataset import LIDAR_NORM_FACTOR elif backend == "VAE1DLSTM": from navrep.scripts.train_vae1dlstm import _Z, _H from navrep.tools.wdataset import LIDAR_NORM_FACTOR elif backend == "VAELSTM": from navrep.scripts.train_vaelstm import _Z, _H elif backend == "VAE_LSTM": from navrep.scripts.train_vae import _Z from navrep.scripts.train_rnn import _H elif backend == "VAE1D_LSTM": from navrep.scripts.train_vae1d import _Z from navrep.scripts.train_rnn import _H from navrep.scripts.train_vae1d import MAX_LIDAR_DIST as LIDAR_NORM_FACTOR self._Z = _Z self._H = _H self.LIDAR_NORM_FACTOR = LIDAR_NORM_FACTOR self.encoding = encoding self.backend = backend if self.encoding == "V_ONLY": self.encoding_dim = _Z + _RS elif self.encoding == "VM": self.encoding_dim = _Z + _H + _RS elif self.encoding == "M_ONLY": self.encoding_dim = _H + _RS else: raise NotImplementedError self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.encoding_dim,), dtype=np.float32) # V + M Models if encoder_to_share_model_with is not None: self.vae = encoder_to_share_model_with.vae self.rnn = encoder_to_share_model_with.rnn else: # load world model if self.backend == "VAE_LSTM": reset_graph() self.vae = ConvVAE(z_size=_Z, batch_size=1, is_training=False) self.vae.load_json(vae_model_path) if self.encoding in ["VM", "M_ONLY"]: hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H) self.rnn = MDNRNN(hps, gpu_mode=gpu) self.rnn.load_json(rnn_model_path) elif self.backend == "VAE1D_LSTM": reset_graph() self.vae = Conv1DVAE(z_size=_Z, batch_size=1, is_training=False) self.vae.load_json(vae1d_model_path) if self.encoding in ["VM", "M_ONLY"]: hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H) self.rnn = MDNRNN(hps, gpu_mode=gpu) self.rnn.load_json(rnn1d_model_path) elif self.backend == "GPT": mconf = GPTConfig(BLOCK_SIZE, _H) model = GPT(mconf, gpu=gpu) load_checkpoint(model, gpt_model_path, gpu=gpu) self.vae = model self.rnn = model elif self.backend == "GPT1D": mconf = GPTConfig(BLOCK_SIZE, _H) model = GPT1D(mconf, gpu=gpu) load_checkpoint(model, gpt1d_model_path, gpu=gpu) self.vae = model self.rnn = model elif self.backend == "VAELSTM": mconf = VAELSTMConfig(_Z, _H) model = VAELSTM(mconf, gpu=gpu) load_checkpoint(model, vaelstm_model_path, gpu=gpu) self.vae = model self.rnn = model elif self.backend == "VAE1DLSTM": mconf = VAE1DLSTMConfig(_Z, _H) model = VAE1DLSTM(mconf, gpu=gpu) load_checkpoint(model, vae1dlstm_model_path, gpu=gpu) self.vae = model self.rnn = model else: raise NotImplementedError # other tools self.rings_def = generate_rings(_64, _64) self.viewer = None # environment state variables self.reset() def reset(self): if self.encoding in ["VM", "M_ONLY"]: if self.backend in ["VAE_LSTM", "VAE1D_LSTM"]: self.state = rnn_init_state(self.rnn) elif self.backend in ["GPT", "VAELSTM", "VAE1DLSTM", "GPT1D"]: self.gpt_sequence = [] self.lidar_z = np.zeros(self._Z) def close(self): if self.viewer is not None: self.viewer.close() def _get_last_decoded_scan(self): obs_pred = self.vae.decode(self.lidar_z.reshape((1,self._Z))) if self.backend in ["VAE1DLSTM", "GPT1D", "VAE1D_LSTM"]: decoded_scan = (obs_pred * self.LIDAR_NORM_FACTOR).reshape((_L)) else: rings_pred = obs_pred * self.rings_def["rings_to_bool"] decoded_scan = self.rings_def["rings_to_lidar"](rings_pred, _L).reshape((_L)) return decoded_scan def _encode_obs(self, obs, action): """ obs is (lidar, other_obs) where lidar is (time_samples, ray, channel) and other_obs is (5,) - [goal_x, goal_y, vel_x, vel_y, vel_theta] all in robot frame h is (32+2+512), i.e. concat[lidar_z, robotstate, h rnn state] lidar_z is -inf, inf h rnn state is ? other_obs is -inf, inf """ # convert lidar scan to obs lidar_scan = obs[0] # latest scan only obs (buffer, ray, channel) lidar_scan = lidar_scan.reshape(1, _L).astype(np.float32) lidar_mode = "scans" if "1D" in self.backend else "rings" lidar_obs = scans_to_lidar_obs(lidar_scan, lidar_mode, self.rings_def, channel_first=False) self.last_lidar_obs = lidar_obs # for rendering purposes # obs to z, mu, logvar mu, logvar = self.vae.encode_mu_logvar(lidar_obs) mu = mu[0] logvar = logvar[0] s = logvar.shape if NO_VAE_VAR: lidar_z = mu * 1. else: lidar_z = mu + np.exp(logvar / 2.0) * np.random.randn(*s) # encode obs through V + M self.lidar_z = lidar_z if self.encoding == "V_ONLY": encoded_obs = np.concatenate([self.lidar_z, obs[1]], axis=0) elif self.encoding in ["VM", "M_ONLY"]: # get h if self.backend in ["VAE_LSTM", "VAE1D_LSTM"]: goal_z = obs[1][:2] / MAX_GOAL_DIST rnn_z = np.concatenate([lidar_z, goal_z], axis=-1) self.state = rnn_next_state(self.rnn, rnn_z, action, self.state) h = self.state.h[0] elif self.backend in ["GPT", "VAELSTM", "VAE1DLSTM", "GPT1D"]: self.gpt_sequence.append(dict(obs=lidar_obs[0], state=obs[1][:2], action=action)) self.gpt_sequence = self.gpt_sequence[:BLOCK_SIZE] h = self.rnn.get_h(self.gpt_sequence) # encoded obs if self.encoding == "VM": encoded_obs = np.concatenate([self.lidar_z, obs[1], h], axis=0) elif self.encoding == "M_ONLY": encoded_obs = np.concatenate([obs[1], h], axis=0) return encoded_obs def _render_rings_polar(self, close, save_to_file=False): if close: self.viewer.close() return # rendering if self.backend in ["VAE1DLSTM", "GPT1D", "VAE1D_LSTM"]: return False else: last_rings_obs = self.last_lidar_obs.reshape((_64, _64, 1)) last_rings_pred = self.vae.decode(self.lidar_z.reshape((1,self._Z))).reshape((_64, _64, 1)) import matplotlib.pyplot as plt plt.ion() fig, (ax1, ax2) = plt.subplots( 1, 2, subplot_kw=dict(projection="polar"), num="rings" ) ax1.clear() ax2.clear() if self.viewer is None: self.rendering_iteration = 0 self.viewer = fig self.rings_def["visualize_rings"](last_rings_obs, scan=None, fig=fig, ax=ax1) self.rings_def["visualize_rings"](last_rings_pred, scan=None, fig=fig, ax=ax2) ax1.set_ylim([0, 10]) ax1.set_title("ground truth") ax2.set_ylim([0, 10]) ax2.set_title("lidar reconstruction") # rings box viz fig2, (ax1, ax2) = plt.subplots(1, 2, num="2d") ax1.clear() ax2.clear() ax1.imshow(np.squeeze(last_rings_obs), cmap=plt.cm.Greys) ax2.imshow(np.squeeze(last_rings_pred), cmap=plt.cm.Greys) ax1.set_title("ground truth") ax2.set_title("lidar reconstruction") # update plt.pause(0.01) self.rendering_iteration += 1 if save_to_file: fig.savefig( "/tmp/encodedenv_polar{:04d}.png".format(self.rendering_iteration)) fig2.savefig( "/tmp/encodedenv_box{:04d}.png".format(self.rendering_iteration)) def _render_rings(self, close, save_to_file=False): if close: self.viewer.close() return # rendering if self.backend in ["VAE1DLSTM", "GPT1D", "VAE1D_LSTM"]: return False else: last_rings_obs = self.last_lidar_obs.reshape((_64, _64)) last_rings_pred = self.vae.decode(self.lidar_z.reshape((1,self._Z))).reshape((_64, _64)) # Window and viewport size ring_size = _64 # grid cells padding = 4 # grid cells grid_size = 1 # px per grid cell WINDOW_W = (2 * ring_size + 3 * padding) * grid_size WINDOW_H = (1 * ring_size + 2 * padding) * grid_size VP_W = WINDOW_W VP_H = WINDOW_H from gym.envs.classic_control import rendering import pyglet from pyglet import gl # Create viewer if self.viewer is None: self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.rendering_iteration = 0 # Render in pyglet win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() gl.glViewport(0, 0, VP_W, VP_H) # colors bgcolor = np.array([0.4, 0.8, 0.4]) # Green background gl.glBegin(gl.GL_QUADS) gl.glColor4f(bgcolor[0], bgcolor[1], bgcolor[2], 1.0) gl.glVertex3f(0, VP_H, 0) gl.glVertex3f(VP_W, VP_H, 0) gl.glVertex3f(VP_W, 0, 0) gl.glVertex3f(0, 0, 0) gl.glEnd() # rings - observation w_offset = 0 for rings in [last_rings_obs, last_rings_pred]: for i in range(ring_size): for j in range(ring_size): cell_color = 1 - rings[i, j] cell_y = (padding + i) * grid_size # px cell_x = (padding + j + w_offset) * grid_size # px gl.glBegin(gl.GL_QUADS) gl.glColor4f(cell_color, cell_color, cell_color, 1.0) gl.glVertex3f(cell_x+ 0, cell_y+grid_size, 0) # noqa gl.glVertex3f(cell_x+grid_size, cell_y+grid_size, 0) # noqa gl.glVertex3f(cell_x+grid_size, cell_y+ 0, 0) # noqa gl.glVertex3f(cell_x+ 0, cell_y+ 0, 0) # noqa gl.glEnd() w_offset += ring_size + padding if save_to_file: pyglet.image.get_buffer_manager().get_color_buffer().save( "/tmp/encodeder_rings{:04d}.png".format(self.rendering_iteration)) # actualize win.flip() self.rendering_iteration += 1 return self.viewer.isopen
from navrep.models.rnn import reset_graph, sample_hps_params, MDNRNN, get_pi_idx from navrep.models.vae2d import ConvVAE # parameters TEMPERATURE = 0.5 _Z = 32 sequence_z_path = os.path.expanduser( "~/navrep/datasets/M/ian/000_mus_logvars_robotstates_actions_rewards_dones.npz" ) rnn_model_path = os.path.expanduser("~/navrep/models/M/rnn.json") vae_model_path = os.path.expanduser("~/navrep/models/V/vae.json") reset_graph() rnn = MDNRNN(sample_hps_params, gpu_mode=False) vae = ConvVAE(batch_size=1, is_training=False) vae.load_json(vae_model_path) rnn.load_json(rnn_model_path) rings_def = generate_rings(64, 64) # load sequence image encoding arrays = np.load(sequence_z_path) sequence_action = arrays["actions"] sequence_mu = arrays["mus"] sequence_logvar = arrays["logvars"] sequence_z = sequence_mu + np.exp( sequence_logvar / 2.0) * np.random.randn(*(sequence_mu.shape)) SEQUENCE_LENGTH = len(sequence_mu)
if VARIANT == "marktwo": SS = None vae_path = os.path.expanduser("~/navrep/models/V/marktwovae.json") V_dataset_folder = os.path.expanduser("~/navrep/datasets/V/marktwo") M_dataset_folder = os.path.expanduser("~/navrep/datasets/M/marktwo") if VARIANT == "navreptrain": SS = None vae_path = os.path.expanduser("~/navrep/models/V/navreptrainvae.json") V_dataset_folder = os.path.expanduser("~/navrep/datasets/V/navreptrain") M_dataset_folder = os.path.expanduser("~/navrep/datasets/M/navreptrain") # create network reset_graph() vae = ConvVAE( z_size=_Z, batch_size=1, is_training=False, ) vae.load_json(vae_path) # rings converter rings_def = generate_rings(64, 64) # labels to learn are x, r, d (obs, reward, done) files = [] for dirpath, dirnames, filenames in os.walk(V_dataset_folder): for filename in [f for f in filenames if f.endswith(".npz")]: files.append(os.path.join(dirpath, filename)) files = sorted(files) for path in files:
# Parameters for training batch_size = 100 NUM_EPOCH = 1000 # 10 DATA_DIR = "record" HOME = os.path.expanduser("~") model_save_dir = HOME + "/navrep/models/V" model_save_path = os.path.join(model_save_dir, "imvae.json") if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) # create network reset_graph() vae = ConvVAE( batch_size=batch_size, is_training=True, reuse=False, channels=3, ) # create training dataset dataset, _, _, _ = rosbag_to_image_dataset( "~/rosbags/openlab_rosbags/corridor_koze_kids.bag") if len(dataset) == 0: raise ValueError("no images found, exiting") # split into batches: total_length = len(dataset) num_batches = int(np.floor(total_length / batch_size)) # train loop: print("train", "step", "loss", "recon_loss", "kl_loss")
class DreamEnv(object): def __init__( self, temperature=0.25, initial_z_path=os.path. expanduser( "~/navrep/datasets/M/ian/000_mus_logvars_robotstates_actions_rewards_dones.npz" ), rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"), vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"), ): # constants self.TEMPERATURE = temperature self.DT = 0.5 # should be the same as data rnn was trained with # V + M Models reset_graph() self.rnn = MDNRNN(sample_hps_params, gpu_mode=False) self.vae = ConvVAE(batch_size=1, is_training=False) self.vae.load_json(vae_model_path) self.rnn.load_json(rnn_model_path) # load initial image encoding arrays = np.load(initial_z_path) initial_mu = arrays["mus"][0] initial_logvar = arrays["logvars"][0] initial_robotstate = arrays["robotstates"][0] ini_lidar_z = initial_mu + np.exp( initial_logvar / 2.0) * np.random.randn(*(initial_mu.shape)) ini_goal_z = initial_robotstate[:2] / MAX_GOAL_DIST self.initial_z = np.concatenate([ini_lidar_z, ini_goal_z], axis=-1) # other tools self.rings_def = generate_rings(64, 64) self.viewer = None # environment state variables self.reset() # hot-start the rnn state for i in range(20): self.step(np.array([0, 0, 0]), override_next_z=self.initial_z) def step(self, action, override_next_z=None): feed = { self.rnn.input_z: np.reshape(self.prev_z, (1, 1, _Z + _G)), self.rnn.input_action: np.reshape(action, (1, 1, 3)), self.rnn.input_restart: np.reshape(self.prev_restart, (1, 1)), self.rnn.initial_state: self.rnn_state, } [logmix, mean, logstd, logrestart, next_state] = self.rnn.sess.run( [ self.rnn.out_logmix, self.rnn.out_mean, self.rnn.out_logstd, self.rnn.out_restart_logits, self.rnn.final_state, ], feed, ) OUTWIDTH = _Z + _G if self.TEMPERATURE == 0: # deterministically pick max of MDN distribution mixture_idx = np.argmax(logmix, axis=-1) chosen_mean = mean[(range(OUTWIDTH), mixture_idx)] chosen_logstd = logstd[(range(OUTWIDTH), mixture_idx)] next_z = chosen_mean else: # sample from modelled MDN distribution mixprob = np.copy(logmix) / self.TEMPERATURE # adjust temperatures mixprob -= mixprob.max() mixprob = np.exp(mixprob) mixprob /= mixprob.sum(axis=1).reshape(OUTWIDTH, 1) mixture_idx = np.zeros(OUTWIDTH) chosen_mean = np.zeros(OUTWIDTH) chosen_logstd = np.zeros(OUTWIDTH) for j in range(OUTWIDTH): idx = get_pi_idx(np.random.rand(), mixprob[j]) mixture_idx[j] = idx chosen_mean[j] = mean[j][idx] chosen_logstd[j] = logstd[j][idx] rand_gaussian = np.random.randn(OUTWIDTH) * np.sqrt( self.TEMPERATURE) next_z = chosen_mean + np.exp(chosen_logstd) * rand_gaussian if sample_hps_params.differential_z: next_z = self.prev_z + next_z next_restart = 0 # if logrestart[0] > 0: # next_restart = 1 self.prev_z = next_z if override_next_z is not None: self.prev_z = override_next_z self.prev_restart = next_restart self.rnn_state = next_state # logging-only vars, used for rendering self.prev_action = action self.episode_step += 1 return next_z, None, next_restart, {} def reset(self): self.prev_z = self.initial_z self.prev_restart = np.array([1]) self.rnn_state = self.rnn.sess.run(self.rnn.zero_state) # logging vars self.prev_action = np.array([0.0, 0.0, 0.0]) self.episode_step = 0 def render(self, mode="human", close=False): if close: if self.viewer is not None: self.viewer.close() return # get last z decoding rings_pred = ( self.vae.decode(self.prev_z.reshape(1, _Z + _G)[:, :_Z]) * self.rings_def["rings_to_bool"]) predicted_ranges = self.rings_def["rings_to_lidar"](rings_pred, 1080) goal_pred = self.prev_z.reshape((_Z + _G, ))[_Z:] * MAX_GOAL_DIST if mode == "rgb_array": raise NotImplementedError elif mode == "human": # Window and viewport size WINDOW_W = 256 WINDOW_H = 256 M_PER_PX = 25.6 / WINDOW_H VP_W = WINDOW_W VP_H = WINDOW_H from gym.envs.classic_control import rendering import pyglet from pyglet import gl # Create viewer if self.viewer is None: self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H) self.score_label = pyglet.text.Label( "0000", font_size=12, x=20, y=WINDOW_H * 2.5 / 40.00, anchor_x="left", anchor_y="center", color=(255, 255, 255, 255), ) # self.transform = rendering.Transform() self.currently_rendering_iteration = 0 self.image_lock = threading.Lock() # Render in pyglet def make_circle(c, r, res=10): thetas = np.linspace(0, 2 * np.pi, res + 1)[:-1] verts = np.zeros((res, 2)) verts[:, 0] = c[0] + r * np.cos(thetas) verts[:, 1] = c[1] + r * np.sin(thetas) return verts with self.image_lock: self.currently_rendering_iteration += 1 self.viewer.draw_circle(r=10, color=(0.3, 0.3, 0.3)) win = self.viewer.window win.switch_to() win.dispatch_events() win.clear() gl.glViewport(0, 0, VP_W, VP_H) # colors bgcolor = np.array([0.4, 0.8, 0.4]) nosecolor = np.array([0.3, 0.3, 0.3]) lidarcolor = np.array([1.0, 0.0, 0.0]) # Green background gl.glBegin(gl.GL_QUADS) gl.glColor4f(bgcolor[0], bgcolor[1], bgcolor[2], 1.0) gl.glVertex3f(0, VP_H, 0) gl.glVertex3f(VP_W, VP_H, 0) gl.glVertex3f(VP_W, 0, 0) gl.glVertex3f(0, 0, 0) gl.glEnd() # LIDAR i = WINDOW_W / 2.0 j = WINDOW_H / 2.0 angle = np.pi / 2.0 scan = np.squeeze(predicted_ranges) lidar_angles = np.linspace(0, 2 * np.pi, len(scan) + 1)[:-1] lidar_angles = lidar_angles + np.pi / 2. # make robot face up i_ray_ends = i + scan / M_PER_PX * np.cos(lidar_angles) j_ray_ends = j + scan / M_PER_PX * np.sin(lidar_angles) is_in_fov = np.cos(lidar_angles - angle) >= 0.78 for ray_idx in range(len(scan)): end_i = i_ray_ends[ray_idx] end_j = j_ray_ends[ray_idx] gl.glBegin(gl.GL_LINE_LOOP) if is_in_fov[ray_idx]: gl.glColor4f(1.0, 1.0, 0.0, 0.1) else: gl.glColor4f(lidarcolor[0], lidarcolor[1], lidarcolor[2], 0.1) gl.glVertex3f(i, j, 0) gl.glVertex3f(end_i, end_j, 0) gl.glEnd() # Agent body i = WINDOW_W / 2.0 j = WINDOW_H / 2.0 r = 0.3 / M_PER_PX angle = np.pi / 2.0 poly = make_circle((i, j), r) gl.glBegin(gl.GL_POLYGON) color = np.array([1.0, 1.0, 1.0]) gl.glColor4f(color[0], color[1], color[2], 1) for vert in poly: gl.glVertex3f(vert[0], vert[1], 0) gl.glEnd() # Direction triangle inose = i + r * np.cos(angle) jnose = j + r * np.sin(angle) iright = i + 0.3 * r * -np.sin(angle) jright = j + 0.3 * r * np.cos(angle) ileft = i - 0.3 * r * -np.sin(angle) jleft = j - 0.3 * r * np.cos(angle) gl.glBegin(gl.GL_TRIANGLES) gl.glColor4f(nosecolor[0], nosecolor[1], nosecolor[2], 1) gl.glVertex3f(inose, jnose, 0) gl.glVertex3f(iright, jright, 0) gl.glVertex3f(ileft, jleft, 0) gl.glEnd() # Goal goalcolor = np.array([1., 1., 0.3]) px_goal = goal_pred / M_PER_PX igoal = i - px_goal[1] # rotate 90deg to face up jgoal = j + px_goal[0] # Goal line gl.glBegin(gl.GL_LINE_LOOP) gl.glColor4f(goalcolor[0], goalcolor[1], goalcolor[2], 1) gl.glVertex3f(i, j, 0) gl.glVertex3f(igoal, jgoal, 0) gl.glEnd() # Goal markers gl.glBegin(gl.GL_TRIANGLES) gl.glColor4f(goalcolor[0], goalcolor[1], goalcolor[2], 1) triangle = make_circle((igoal, jgoal), r / 3., res=3) for vert in triangle: gl.glVertex3f(vert[0], vert[1], 0) gl.glEnd() # Text self.score_label.text = "A {:.1f} {:.1f} {:.1f} S {}".format( self.prev_action[0], self.prev_action[1], self.prev_action[2], self.episode_step, ) self.score_label.draw() win.flip() return self.viewer.isopen def close(self): self.render(close=True) def _get_dt(self): return self.DT def _get_viewer(self): return self.viewer
lidar_e = None state_e = None if step % 200 == 0: # load VAE if VAE_TYPE == "1d": if vae is None: vae = Conv1DVAE(z_size=_Z, batch_size=model.hps.max_seq_len - 1, is_training=False) vae.load_json(vae_model_path) lidar_e, state_e = vae1d_rnn_worldmodel_error( model, test_dataset_folder, vae) else: if vae is None: vae = ConvVAE(z_size=_Z, batch_size=model.hps.max_seq_len - 1, is_training=False) vae.load_json(vae_model_path) lidar_e, state_e = rnn_worldmodel_error( model, test_dataset_folder, vae) print("Test: lidar error {}, state error {}".format( lidar_e, state_e)) model.save_json(model_path) if step % 20 == 0 and step > 0: end = time.time() time_taken = end - start start = time.time() output_log = ( "step: %d, lr: %.6f, cost: %.4f, z_cost: %.4f, r_cost: %.4f, train_time_taken: %.4f"
from navrep.models.tcn import reset_graph, sample_hps_params, MDNTCN, get_pi_idx from navrep.models.vae2d import ConvVAE # parameters TEMPERATURE = 0.5 _Z = 32 sequence_z_path = os.path.expanduser( "~/navrep/datasets/M/ian/000_mus_logvars_robotstates_actions_rewards_dones.npz" ) rnn_model_path = os.path.expanduser("~/navrep/models/M/tcn.json") vae_model_path = os.path.expanduser("~/navrep/models/V/vae.json") reset_graph() tcn = MDNTCN(sample_hps_params, gpu_mode=False) vae = ConvVAE(batch_size=1, is_training=False) vae.load_json(vae_model_path) tcn.load_json(rnn_model_path) rings_def = generate_rings(64, 64) # load sequence image encoding arrays = np.load(sequence_z_path) sequence_action = arrays["actions"] sequence_mu = arrays["mus"] sequence_logvar = arrays["logvars"] sequence_restart = arrays["dones"] sequence_z = sequence_mu + np.exp( sequence_logvar / 2.0) * np.random.randn(*(sequence_mu.shape))