Beispiel #1
0
 def __init__(self, temperature=0.25):
     # constants
     self.TEMPERATURE = temperature
     self.SEQLEN = 99
     self.DT = 0.2  # should be the same as data rnn was trained with
     initial_z_path = os.path.expanduser(
         "~/navrep/datasets/M/toy/000_mus_logvars_robotstates_actions_rewards_dones.npz"
     )
     tcn_model_path = os.path.expanduser("~/navrep/models/M/toytcn.json")
     vae_model_path = os.path.expanduser("~/navrep/models/V/toyvae.json")
     # V + M Models
     reset_graph()
     params = sample_hps_params._replace(max_seq_len=self.SEQLEN + 1)
     self.tcn = MDNTCN(params, gpu_mode=False)
     self.vae = ConvVAE(batch_size=1, is_training=False)
     self.vae.load_json(vae_model_path)
     self.tcn.load_json(tcn_model_path)
     # load initial image encoding
     arrays = np.load(initial_z_path)
     # other tools
     self.rings_def = generate_rings(64, 64)
     self.viewer = None
     # environment state variables
     self.reset()
     # hot-start the tcn state
     self.sequence_z = arrays["mus"][:self.SEQLEN].reshape(
         (1, self.SEQLEN, _Z))
     self.sequence_action = arrays["actions"][:self.SEQLEN].reshape(
         (1, self.SEQLEN, 3))
     self.sequence_restart = arrays["dones"][:self.SEQLEN].reshape(
         (1, self.SEQLEN))
Beispiel #2
0
 def __init__(self, temperature=0.25):
     # constants
     self.TEMPERATURE = temperature
     self.DT = 0.2  # should be the same as data rnn was trained with
     initial_z_path = os.path.expanduser(
         "~/navrep/datasets/M/im/corridor_koze_kids_bag_mus_logvars_robotstates_actions_rewards_dones.npz"
     )
     rnn_model_path = os.path.expanduser("~/navrep/models/M/imrnn.json")
     vae_model_path = os.path.expanduser("~/navrep/models/V/imvae.json")
     # V + M Models
     reset_graph()
     self.rnn = MDNRNN(sample_hps_params, gpu_mode=False)
     self.vae = ConvVAE(batch_size=1, is_training=False, channels=3)
     self.vae.load_json(vae_model_path)
     self.rnn.load_json(rnn_model_path)
     # load initial image encoding
     arrays = np.load(initial_z_path)
     initial_mu = arrays["mus"][0]
     initial_logvar = arrays["logvars"][0]
     self.initial_z = initial_mu + np.exp(initial_logvar / 2.0) * np.random.randn(
         *(initial_mu.shape)
     )
     # other tools
     self.viewer = None
     # environment state variables
     self.reset()
     # hot-start the rnn state
     for i in range(20):
         self.step(np.array([0,0,0]), override_next_z=self.initial_z)
Beispiel #3
0
 def __init__(
         self,
         temperature=0.25,
         initial_z_path=os.path.
     expanduser(
         "~/navrep/datasets/M/ian/000_mus_logvars_robotstates_actions_rewards_dones.npz"
     ),
         rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"),
         vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"),
 ):
     # constants
     self.TEMPERATURE = temperature
     self.DT = 0.5  # should be the same as data rnn was trained with
     # V + M Models
     reset_graph()
     self.rnn = MDNRNN(sample_hps_params, gpu_mode=False)
     self.vae = ConvVAE(batch_size=1, is_training=False)
     self.vae.load_json(vae_model_path)
     self.rnn.load_json(rnn_model_path)
     # load initial image encoding
     arrays = np.load(initial_z_path)
     initial_mu = arrays["mus"][0]
     initial_logvar = arrays["logvars"][0]
     initial_robotstate = arrays["robotstates"][0]
     ini_lidar_z = initial_mu + np.exp(
         initial_logvar / 2.0) * np.random.randn(*(initial_mu.shape))
     ini_goal_z = initial_robotstate[:2] / MAX_GOAL_DIST
     self.initial_z = np.concatenate([ini_lidar_z, ini_goal_z], axis=-1)
     # other tools
     self.rings_def = generate_rings(64, 64)
     self.viewer = None
     # environment state variables
     self.reset()
     # hot-start the rnn state
     for i in range(20):
         self.step(np.array([0, 0, 0]), override_next_z=self.initial_z)
Beispiel #4
0
from navrep.tools.rings import generate_rings
from navrep.models.vae2d import ConvVAE, reset_graph

DEBUG_PLOTTING = True

# Parameters for training
batch_size = 1
NUM_EPOCH = 100
DATA_DIR = "record"
HOME = os.path.expanduser("~")

vae_model_path = os.path.expanduser("~/navrep/models/V/vae.json")

# create network
reset_graph()
vae = ConvVAE(batch_size=batch_size, is_training=False)

# load
vae.load_json(vae_model_path)

# create training dataset
dataset = archive_to_lidar_dataset("~/navrep/datasets/V/ian", limit=180)
if len(dataset) == 0:
    raise ValueError("no scans found, exiting")
print(len(dataset), "scans in dataset.")

# split into batches:
total_length = len(dataset)
num_batches = int(np.floor(total_length / batch_size))

# rings converter
Beispiel #5
0
        log_path = os.path.expanduser(
            "~/navrep/logs/V/irlvae_train_log_{}.csv".format(START_TIME))

    if common_args.dry_run:
        model_save_path = model_save_path.replace(
            os.path.expanduser("~/navrep"), "/tmp/navrep")
        log_path = log_path.replace(os.path.expanduser("~/navrep"),
                                    "/tmp/navrep")

    make_dir_if_not_exists(os.path.dirname(model_save_path))
    make_dir_if_not_exists(os.path.dirname(log_path))

    # create network
    reset_graph()
    vae = ConvVAE(z_size=_Z,
                  batch_size=batch_size,
                  is_training=True,
                  reuse=False)
    vae.print_trainable_params()

    # create training dataset
    dataset = archive_to_lidar_dataset(dataset_dir)
    if len(dataset) == 0:
        raise ValueError("no scans found, exiting")
    print(len(dataset), "scans in dataset.")

    # split into batches:
    total_length = len(dataset)
    num_batches = int(np.floor(total_length / batch_size))

    # rings converter
    rings_def = generate_rings(64, 64)
Beispiel #6
0
    time_taken = end - start
    #     print('time taken to create batches', time_taken)

    batch_state = model.sess.run(model.initial_state)

    for batch_z, batch_action, batch_done, batch_reward in zip(
        z_batches, action_batches, done_batches, reward_batches
    ):

        if False:  # Visually check that the batch is sound
            from navrep.models.vae2d import ConvVAE
            import matplotlib.pyplot as plt
            from navrep.tools.rings import generate_rings

            reset_graph()
            vae = ConvVAE(batch_size=1, is_training=False)
            vae.load_json(vae_model_path)
            rings_def = generate_rings(64, 64)
            rings_pred = vae.decode(batch_z[0]) * rings_def["rings_to_bool"]
            plt.ion()
            for i, ring in enumerate(rings_pred):
                rings_def["visualize_rings"](ring, scan=None)
                plt.ylim([0, 10])
                plt.title(str(batch_action[0, i]))
                plt.pause(0.1)
            exit()
        if False:
            from navrep.models.vae2d import ConvVAE
            from navrep.tools.render import render_lidar_batch
            from navrep.tools.rings import generate_rings
Beispiel #7
0
from navrep.models.rnn import reset_graph, sample_hps_params, MDNRNN, get_pi_idx
from navrep.models.vae2d import ConvVAE

# parameters
TEMPERATURE = 0.5
_Z = 32

sequence_z_path = os.path.expanduser(
    "~/navrep/datasets/M/im/corridor_koze_kids_bag_mus_logvars_actions_rewards_dones.npz"
)
rnn_model_path = os.path.expanduser("~/navrep/models/M/imrnn.json")
vae_model_path = os.path.expanduser("~/navrep/models/V/imvae.json")

reset_graph()
imrnn = MDNRNN(sample_hps_params, gpu_mode=False)
imvae = ConvVAE(batch_size=1, is_training=False, channels=3)

imvae.load_json(vae_model_path)
imrnn.load_json(rnn_model_path)

# load sequence image encoding
arrays = np.load(sequence_z_path)
sequence_action = arrays["actions"]
sequence_mu = arrays["mus"]
sequence_logvar = arrays["logvars"]
sequence_z = sequence_mu + np.exp(
    sequence_logvar / 2.0) * np.random.randn(*(sequence_mu.shape))
SEQUENCE_LENGTH = len(sequence_mu)

prev_z = sequence_z[0]
prev_z_predicted = sequence_z[0]
Beispiel #8
0
class ToyTCNDreamEnv(object):
    def __init__(self, temperature=0.25):
        # constants
        self.TEMPERATURE = temperature
        self.SEQLEN = 99
        self.DT = 0.2  # should be the same as data rnn was trained with
        initial_z_path = os.path.expanduser(
            "~/navrep/datasets/M/toy/000_mus_logvars_robotstates_actions_rewards_dones.npz"
        )
        tcn_model_path = os.path.expanduser("~/navrep/models/M/toytcn.json")
        vae_model_path = os.path.expanduser("~/navrep/models/V/toyvae.json")
        # V + M Models
        reset_graph()
        params = sample_hps_params._replace(max_seq_len=self.SEQLEN + 1)
        self.tcn = MDNTCN(params, gpu_mode=False)
        self.vae = ConvVAE(batch_size=1, is_training=False)
        self.vae.load_json(vae_model_path)
        self.tcn.load_json(tcn_model_path)
        # load initial image encoding
        arrays = np.load(initial_z_path)
        # other tools
        self.rings_def = generate_rings(64, 64)
        self.viewer = None
        # environment state variables
        self.reset()
        # hot-start the tcn state
        self.sequence_z = arrays["mus"][:self.SEQLEN].reshape(
            (1, self.SEQLEN, _Z))
        self.sequence_action = arrays["actions"][:self.SEQLEN].reshape(
            (1, self.SEQLEN, 3))
        self.sequence_restart = arrays["dones"][:self.SEQLEN].reshape(
            (1, self.SEQLEN))

    def step(self, action, override_next_z=None):
        # predict for fixed-sized sequence, lpadded with zeros
        self.sequence_action[0, -1, :] = action
        feed = {
            self.tcn.input_z:
            np.reshape(self.sequence_z[:self.SEQLEN], (1, self.SEQLEN, _Z)),
            self.tcn.input_action:
            np.reshape(self.sequence_action[:self.SEQLEN],
                       (1, self.SEQLEN, 3)),
            self.tcn.input_restart:
            np.reshape(self.sequence_restart[:self.SEQLEN], (1, self.SEQLEN)),
        }

        [logmix, mean, logstd, logrestart] = self.tcn.sess.run([
            self.tcn.out_logmix, self.tcn.out_mean, self.tcn.out_logstd,
            self.tcn.out_restart_logits
        ], feed)

        logmix = logmix.reshape(
            (self.SEQLEN, _Z, sample_hps_params.num_mixture))
        mean = mean.reshape((self.SEQLEN, _Z, sample_hps_params.num_mixture))
        logstd = logstd.reshape(
            (self.SEQLEN, _Z, sample_hps_params.num_mixture))
        logrestart = logrestart.reshape((self.SEQLEN, 1))

        OUTWIDTH = _Z

        # adjust temperatures
        logmix2 = np.copy(logmix) / self.TEMPERATURE
        logmix2 -= logmix2.max()
        logmix2 = np.exp(logmix2)
        logmix2 /= logmix2.sum(axis=-1).reshape((self.SEQLEN, _Z, 1))

        mixture_idx = np.zeros((self.SEQLEN, OUTWIDTH))
        chosen_mean = np.zeros((self.SEQLEN, OUTWIDTH))
        chosen_logstd = np.zeros((self.SEQLEN, OUTWIDTH))
        for i in range(len(mixture_idx)):
            for j in range(OUTWIDTH):
                idx = get_pi_idx(np.random.rand(), logmix2[i, j])
                mixture_idx[i, j] = idx
                chosen_mean[i, j] = mean[i, j][idx]
                chosen_logstd[i, j] = logstd[i, j][idx]

        rand_gaussian = np.random.randn(self.SEQLEN, OUTWIDTH) * np.sqrt(
            self.TEMPERATURE)
        seq_z_predicted = chosen_mean + np.exp(chosen_logstd) * rand_gaussian
        if sample_hps_params.differential_z:
            seq_z_predicted = np.reshape(
                self.sequence_z[:self.SEQLEN],
                (1, self.SEQLEN, _Z)) + seq_z_predicted

        # pick last output
        next_z = seq_z_predicted[0, -1, :]

        next_restart = 0
        #         if logrestart[0] > 0:
        #             next_restart = 1

        # update variables
        self.sequence_z[0, :-1, :] = self.sequence_z[0, 1:, :]
        self.sequence_action[0, :-1, :] = self.sequence_action[0, 1:, :]
        self.sequence_restart[0, :-1] = self.sequence_restart[0, 1:]
        self.sequence_z[0, -1, :] = next_z
        self.sequence_action[0, -1, :] = np.nan
        self.sequence_restart[0, -1] = next_restart

        # logging-only vars, used for rendering
        self.prev_action = action
        self.episode_step += 1

        return next_z, None, next_restart, {}

    def reset(self):
        # logging vars
        self.prev_action = np.array([0.0, 0.0, 0.0])
        self.episode_step = 0

    def render(self, mode="human", close=False):
        rings_pred = (self.vae.decode(self.sequence_z[0, -1].reshape(1, _Z)) *
                      self.rings_def["rings_to_bool"])
        predicted_ranges = self.rings_def["rings_to_lidar"](rings_pred, 1080)

        if mode == "rgb_array":
            raise NotImplementedError
        elif mode == "human":
            # Window and viewport size
            WINDOW_W = 256
            WINDOW_H = 256
            M_PER_PX = 25.6 / WINDOW_H
            VP_W = WINDOW_W
            VP_H = WINDOW_H
            from gym.envs.classic_control import rendering
            import pyglet
            from pyglet import gl

            # Create viewer
            if self.viewer is None:
                self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
                self.score_label = pyglet.text.Label(
                    "0000",
                    font_size=12,
                    x=20,
                    y=WINDOW_H * 2.5 / 40.00,
                    anchor_x="left",
                    anchor_y="center",
                    color=(255, 255, 255, 255),
                )
                #                 self.transform = rendering.Transform()
                self.currently_rendering_iteration = 0
                self.image_lock = threading.Lock()
            # Render in pyglet
            def make_circle(c, r, res=10):
                thetas = np.linspace(0, 2 * np.pi, res + 1)[:-1]
                verts = np.zeros((res, 2))
                verts[:, 0] = c[0] + r * np.cos(thetas)
                verts[:, 1] = c[1] + r * np.sin(thetas)
                return verts

            with self.image_lock:
                self.currently_rendering_iteration += 1
                self.viewer.draw_circle(r=10, color=(0.3, 0.3, 0.3))
                win = self.viewer.window
                win.switch_to()
                win.dispatch_events()
                win.clear()
                gl.glViewport(0, 0, VP_W, VP_H)
                # colors
                bgcolor = np.array([0.4, 0.8, 0.4])
                nosecolor = np.array([0.3, 0.3, 0.3])
                lidarcolor = np.array([1.0, 0.0, 0.0])
                # Green background
                gl.glBegin(gl.GL_QUADS)
                gl.glColor4f(bgcolor[0], bgcolor[1], bgcolor[2], 1.0)
                gl.glVertex3f(0, VP_H, 0)
                gl.glVertex3f(VP_W, VP_H, 0)
                gl.glVertex3f(VP_W, 0, 0)
                gl.glVertex3f(0, 0, 0)
                gl.glEnd()
                # LIDAR
                i = WINDOW_W / 2.0
                j = WINDOW_H / 2.0
                angle = np.pi / 2.0
                scan = np.squeeze(predicted_ranges)
                lidar_angles = np.linspace(0, 2 * np.pi, len(scan) + 1)[:-1]
                i_ray_ends = i + scan / M_PER_PX * np.cos(lidar_angles)
                j_ray_ends = j + scan / M_PER_PX * np.sin(lidar_angles)
                is_in_fov = np.cos(lidar_angles - angle) >= 0.78
                for ray_idx in range(len(scan)):
                    end_i = i_ray_ends[ray_idx]
                    end_j = j_ray_ends[ray_idx]
                    gl.glBegin(gl.GL_LINE_LOOP)
                    if is_in_fov[ray_idx]:
                        gl.glColor4f(1.0, 1.0, 0.0, 0.1)
                    else:
                        gl.glColor4f(lidarcolor[0], lidarcolor[1],
                                     lidarcolor[2], 0.1)
                    gl.glVertex3f(i, j, 0)
                    gl.glVertex3f(end_i, end_j, 0)
                    gl.glEnd()
                # Agent body
                i = WINDOW_W / 2.0
                j = WINDOW_H / 2.0
                r = 0.3 / M_PER_PX
                angle = np.pi / 2.0
                poly = make_circle((i, j), r)
                gl.glBegin(gl.GL_POLYGON)
                color = np.array([1.0, 1.0, 1.0])
                gl.glColor4f(color[0], color[1], color[2], 1)
                for vert in poly:
                    gl.glVertex3f(vert[0], vert[1], 0)
                gl.glEnd()
                # Direction triangle
                inose = i + r * np.cos(angle)
                jnose = j + r * np.sin(angle)
                iright = i + 0.3 * r * -np.sin(angle)
                jright = j + 0.3 * r * np.cos(angle)
                ileft = i - 0.3 * r * -np.sin(angle)
                jleft = j - 0.3 * r * np.cos(angle)
                gl.glBegin(gl.GL_TRIANGLES)
                gl.glColor4f(nosecolor[0], nosecolor[1], nosecolor[2], 1)
                gl.glVertex3f(inose, jnose, 0)
                gl.glVertex3f(iright, jright, 0)
                gl.glVertex3f(ileft, jleft, 0)
                gl.glEnd()
                # Text
                self.score_label.text = "A {:.1f} {:.1f} {:.1f} S {}".format(
                    self.prev_action[0],
                    self.prev_action[1],
                    self.prev_action[2],
                    self.episode_step,
                )
                self.score_label.draw()
                win.flip()
                return self.viewer.isopen
Beispiel #9
0
        elif backend == "VAELSTM":
            from navrep.scripts.train_vaelstm import _Z, _H
        elif backend == "VAE_LSTM":
            from navrep.scripts.train_vae import _Z
            from navrep.scripts.train_rnn import _H
        elif backend == "VAE1D_LSTM":
            from navrep.scripts.train_vae1d import _Z
            from navrep.scripts.train_rnn import _H

        # load W / M model
        model = None
        if backend == "VAE_LSTM":
            vae_model_path = os.path.join(MODELDIR, "V",
                                          environment + "vae.json")
            reset_graph()
            vae = ConvVAE(z_size=_Z, batch_size=1, is_training=False)
            vae.load_json(vae_model_path)
            hps = default_hps()
            hps = hps._replace(seq_width=_Z + _G, action_width=_A, rnn_size=_H)
            rnn = MDNRNN(hps, gpu_mode=gpu)
            rnn.load_json(path)
        elif backend == "VAE1D_LSTM":
            vae_model_path = os.path.join(MODELDIR, "V",
                                          environment + "vae1d.json")
            reset_graph()
            reset_graph()
            vae = Conv1DVAE(z_size=_Z, batch_size=1, is_training=False)
            vae.load_json(vae_model_path)
            hps = default_hps()
            hps = hps._replace(seq_width=_Z + _G, action_width=_A, rnn_size=_H)
            rnn = MDNRNN(hps, gpu_mode=gpu)
Beispiel #10
0
class ImDreamEnv(object):
    def __init__(self, temperature=0.25):
        # constants
        self.TEMPERATURE = temperature
        self.DT = 0.2  # should be the same as data rnn was trained with
        initial_z_path = os.path.expanduser(
            "~/navrep/datasets/M/im/corridor_koze_kids_bag_mus_logvars_robotstates_actions_rewards_dones.npz"
        )
        rnn_model_path = os.path.expanduser("~/navrep/models/M/imrnn.json")
        vae_model_path = os.path.expanduser("~/navrep/models/V/imvae.json")
        # V + M Models
        reset_graph()
        self.rnn = MDNRNN(sample_hps_params, gpu_mode=False)
        self.vae = ConvVAE(batch_size=1, is_training=False, channels=3)
        self.vae.load_json(vae_model_path)
        self.rnn.load_json(rnn_model_path)
        # load initial image encoding
        arrays = np.load(initial_z_path)
        initial_mu = arrays["mus"][0]
        initial_logvar = arrays["logvars"][0]
        self.initial_z = initial_mu + np.exp(initial_logvar / 2.0) * np.random.randn(
            *(initial_mu.shape)
        )
        # other tools
        self.viewer = None
        # environment state variables
        self.reset()
        # hot-start the rnn state
        for i in range(20):
            self.step(np.array([0,0,0]), override_next_z=self.initial_z)

    def step(self, action, override_next_z=None):
        feed = {
            self.rnn.input_z: np.reshape(self.prev_z, (1, 1, _Z)),
            self.rnn.input_action: np.reshape(action, (1, 1, 3)),
            self.rnn.input_restart: np.reshape(self.prev_restart, (1, 1)),
            self.rnn.initial_state: self.rnn_state,
        }

        [logmix, mean, logstd, logrestart, next_state] = self.rnn.sess.run(
            [
                self.rnn.out_logmix,
                self.rnn.out_mean,
                self.rnn.out_logstd,
                self.rnn.out_restart_logits,
                self.rnn.final_state,
            ],
            feed,
        )
        OUTWIDTH = _Z

        if self.TEMPERATURE == 0:  # deterministically pick max of MDN distribution
            mixture_idx = np.argmax(logmix, axis=-1)
            chosen_mean = mean[(range(OUTWIDTH), mixture_idx)]
            chosen_logstd = logstd[(range(OUTWIDTH), mixture_idx)]
            next_z = chosen_mean
        else:  # sample from modelled MDN distribution
            mixprob = np.copy(logmix) / self.TEMPERATURE  # adjust temperatures
            mixprob -= mixprob.max()
            mixprob = np.exp(mixprob)
            mixprob /= mixprob.sum(axis=1).reshape(OUTWIDTH, 1)

            mixture_idx = np.zeros(OUTWIDTH)
            chosen_mean = np.zeros(OUTWIDTH)
            chosen_logstd = np.zeros(OUTWIDTH)
            for j in range(OUTWIDTH):
                idx = get_pi_idx(np.random.rand(), mixprob[j])
                mixture_idx[j] = idx
                chosen_mean[j] = mean[j][idx]
                chosen_logstd[j] = logstd[j][idx]
            rand_gaussian = np.random.randn(OUTWIDTH) * np.sqrt(self.TEMPERATURE)
            next_z = chosen_mean + np.exp(chosen_logstd) * rand_gaussian
        if sample_hps_params.differential_z:
            next_z = self.prev_z + next_z

        next_restart = 0
        #         if logrestart[0] > 0:
        #             next_restart = 1

        self.prev_z = next_z
        if override_next_z is not None:
            self.prev_z = override_next_z
        self.prev_restart = next_restart
        self.rnn_state = next_state
        # logging-only vars, used for rendering
        self.prev_action = action
        self.episode_step += 1

        return next_z, None, next_restart, {}

    def reset(self):
        self.prev_z = self.initial_z
        self.prev_restart = np.array([1])
        self.rnn_state = self.rnn.sess.run(self.rnn.zero_state)
        # logging vars
        self.prev_action = np.array([0.0, 0.0, 0.0])
        self.episode_step = 0

    def render(self, mode="human", close=False):
        img_pred = (self.vae.decode(self.prev_z.reshape(1, _Z)) * 255).astype(np.uint8)
        img_pred = img_pred.reshape(_64, _64, 3)

        if mode == "rgb_array":
            raise NotImplementedError
        elif mode == "human":
            # Window and viewport size
            WINDOW_W = 256
            WINDOW_H = 256
            VP_W = WINDOW_W
            VP_H = WINDOW_H
            from gym.envs.classic_control import rendering
            import pyglet
            from pyglet import gl

            # Create pyglet image
#             pixels = [
#                 255, 0, 0,      0, 255, 0,      0, 0, 255,     # RGB values range from
#                 255, 0, 0,      255, 0, 0,      255, 0, 0,     # 0 to 255 for each color
#                 255, 0, 0,      255, 0, 0,      255, 0, 0,     # component.
#             ]
            from pyglet.gl.gl import GLubyte
            pixels = img_pred.flatten()
            rawData = (GLubyte * len(pixels))(*pixels)
            image_data = pyglet.image.ImageData(_64, _64, 'RGB', rawData)

            # Create viewer
            if self.viewer is None:
                self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
                self.score_label = pyglet.text.Label(
                    "0000",
                    font_size=12,
                    x=20,
                    y=WINDOW_H * 2.5 / 40.00,
                    anchor_x="left",
                    anchor_y="center",
                    color=(255, 255, 255, 255),
                )
                #                 self.transform = rendering.Transform()
                self.currently_rendering_iteration = 0
                self.image_lock = threading.Lock()
            # Render in pyglet
            with self.image_lock:
                self.currently_rendering_iteration += 1
                self.viewer.draw_circle(r=10, color=(0.3, 0.3, 0.3))
                win = self.viewer.window
                win.switch_to()
                win.dispatch_events()
                win.clear()
                gl.glViewport(0, 0, VP_W, VP_H)
                # Image
                image_data.blit(96,96)
                # Text
                self.score_label.text = "A {:.1f} {:.1f} {:.1f} S {}".format(
                    self.prev_action[0],
                    self.prev_action[1],
                    self.prev_action[2],
                    self.episode_step,
                )
                self.score_label.draw()
                win.flip()
                return self.viewer.isopen
Beispiel #11
0
from __future__ import print_function
import numpy as np
import os
from navrep.models.vae2d import ConvVAE, reset_graph
from navrep.tools.rings import generate_rings
from pyniel.python_tools.path_tools import make_dir_if_not_exists

# create network
reset_graph()
imvae = ConvVAE(
    batch_size=1,
    is_training=True,
    channels=3,
)
imvae.load_json(os.path.expanduser("~/navrep/models/V/imvae.json"))

# rings converter
rings_def = generate_rings(64, 64)

# labels to learn are x, r, d (obs, reward, done)
dataset_folder = os.path.expanduser("~/navrep/datasets/V/im")

files = []
for dirpath, dirnames, filenames in os.walk(dataset_folder):
    for filename in [f for f in filenames if f.endswith(".npz")]:
        files.append(os.path.join(dirpath, filename))
files = sorted(files)
for path in files:
    arrays = np.load(path)
    images = arrays["images"]
    rewards = arrays["rewards"]
Beispiel #12
0
 def __init__(self,
              backend, encoding,
              rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"),
              rnn1d_model_path=os.path.expanduser("~/navrep/models/M/rnn1d.json"),
              vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"),
              vae1d_model_path=os.path.expanduser("~/navrep/models/V/vae1d.json"),
              gpt_model_path=os.path.expanduser("~/navrep/models/W/gpt"),
              gpt1d_model_path=os.path.expanduser("~/navrep/models/W/gpt1d"),
              vae1dlstm_model_path=os.path.expanduser("~/navrep/models/W/vae1dlstm"),
              vaelstm_model_path=os.path.expanduser("~/navrep/models/W/vaelstm"),
              gpu=False,
              encoder_to_share_model_with=None,  # another EnvEncoder
              ):
     LIDAR_NORM_FACTOR = None
     if backend == "GPT":
         from navrep.scripts.train_gpt import _Z, _H
     elif backend == "GPT1D":
         from navrep.scripts.train_gpt1d import _Z, _H
         from navrep.tools.wdataset import LIDAR_NORM_FACTOR
     elif backend == "VAE1DLSTM":
         from navrep.scripts.train_vae1dlstm import _Z, _H
         from navrep.tools.wdataset import LIDAR_NORM_FACTOR
     elif backend == "VAELSTM":
         from navrep.scripts.train_vaelstm import _Z, _H
     elif backend == "VAE_LSTM":
         from navrep.scripts.train_vae import _Z
         from navrep.scripts.train_rnn import _H
     elif backend == "VAE1D_LSTM":
         from navrep.scripts.train_vae1d import _Z
         from navrep.scripts.train_rnn import _H
         from navrep.scripts.train_vae1d import MAX_LIDAR_DIST as LIDAR_NORM_FACTOR
     self._Z = _Z
     self._H = _H
     self.LIDAR_NORM_FACTOR = LIDAR_NORM_FACTOR
     self.encoding = encoding
     self.backend = backend
     if self.encoding == "V_ONLY":
         self.encoding_dim = _Z + _RS
     elif self.encoding == "VM":
         self.encoding_dim = _Z + _H + _RS
     elif self.encoding == "M_ONLY":
         self.encoding_dim = _H + _RS
     else:
         raise NotImplementedError
     self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
                                         shape=(self.encoding_dim,), dtype=np.float32)
     # V + M Models
     if encoder_to_share_model_with is not None:
         self.vae = encoder_to_share_model_with.vae
         self.rnn = encoder_to_share_model_with.rnn
     else:
         # load world model
         if self.backend == "VAE_LSTM":
             reset_graph()
             self.vae = ConvVAE(z_size=_Z, batch_size=1, is_training=False)
             self.vae.load_json(vae_model_path)
             if self.encoding in ["VM", "M_ONLY"]:
                 hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H)
                 self.rnn = MDNRNN(hps, gpu_mode=gpu)
                 self.rnn.load_json(rnn_model_path)
         elif self.backend == "VAE1D_LSTM":
             reset_graph()
             self.vae = Conv1DVAE(z_size=_Z, batch_size=1, is_training=False)
             self.vae.load_json(vae1d_model_path)
             if self.encoding in ["VM", "M_ONLY"]:
                 hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H)
                 self.rnn = MDNRNN(hps, gpu_mode=gpu)
                 self.rnn.load_json(rnn1d_model_path)
         elif self.backend == "GPT":
             mconf = GPTConfig(BLOCK_SIZE, _H)
             model = GPT(mconf, gpu=gpu)
             load_checkpoint(model, gpt_model_path, gpu=gpu)
             self.vae = model
             self.rnn = model
         elif self.backend == "GPT1D":
             mconf = GPTConfig(BLOCK_SIZE, _H)
             model = GPT1D(mconf, gpu=gpu)
             load_checkpoint(model, gpt1d_model_path, gpu=gpu)
             self.vae = model
             self.rnn = model
         elif self.backend == "VAELSTM":
             mconf = VAELSTMConfig(_Z, _H)
             model = VAELSTM(mconf, gpu=gpu)
             load_checkpoint(model, vaelstm_model_path, gpu=gpu)
             self.vae = model
             self.rnn = model
         elif self.backend == "VAE1DLSTM":
             mconf = VAE1DLSTMConfig(_Z, _H)
             model = VAE1DLSTM(mconf, gpu=gpu)
             load_checkpoint(model, vae1dlstm_model_path, gpu=gpu)
             self.vae = model
             self.rnn = model
         else:
             raise NotImplementedError
     # other tools
     self.rings_def = generate_rings(_64, _64)
     self.viewer = None
     # environment state variables
     self.reset()
Beispiel #13
0
class EnvEncoder(object):
    """ Generic class to encode the observations of an environment,
    look at EncodedEnv to see how it is typically used """
    def __init__(self,
                 backend, encoding,
                 rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"),
                 rnn1d_model_path=os.path.expanduser("~/navrep/models/M/rnn1d.json"),
                 vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"),
                 vae1d_model_path=os.path.expanduser("~/navrep/models/V/vae1d.json"),
                 gpt_model_path=os.path.expanduser("~/navrep/models/W/gpt"),
                 gpt1d_model_path=os.path.expanduser("~/navrep/models/W/gpt1d"),
                 vae1dlstm_model_path=os.path.expanduser("~/navrep/models/W/vae1dlstm"),
                 vaelstm_model_path=os.path.expanduser("~/navrep/models/W/vaelstm"),
                 gpu=False,
                 encoder_to_share_model_with=None,  # another EnvEncoder
                 ):
        LIDAR_NORM_FACTOR = None
        if backend == "GPT":
            from navrep.scripts.train_gpt import _Z, _H
        elif backend == "GPT1D":
            from navrep.scripts.train_gpt1d import _Z, _H
            from navrep.tools.wdataset import LIDAR_NORM_FACTOR
        elif backend == "VAE1DLSTM":
            from navrep.scripts.train_vae1dlstm import _Z, _H
            from navrep.tools.wdataset import LIDAR_NORM_FACTOR
        elif backend == "VAELSTM":
            from navrep.scripts.train_vaelstm import _Z, _H
        elif backend == "VAE_LSTM":
            from navrep.scripts.train_vae import _Z
            from navrep.scripts.train_rnn import _H
        elif backend == "VAE1D_LSTM":
            from navrep.scripts.train_vae1d import _Z
            from navrep.scripts.train_rnn import _H
            from navrep.scripts.train_vae1d import MAX_LIDAR_DIST as LIDAR_NORM_FACTOR
        self._Z = _Z
        self._H = _H
        self.LIDAR_NORM_FACTOR = LIDAR_NORM_FACTOR
        self.encoding = encoding
        self.backend = backend
        if self.encoding == "V_ONLY":
            self.encoding_dim = _Z + _RS
        elif self.encoding == "VM":
            self.encoding_dim = _Z + _H + _RS
        elif self.encoding == "M_ONLY":
            self.encoding_dim = _H + _RS
        else:
            raise NotImplementedError
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf,
                                            shape=(self.encoding_dim,), dtype=np.float32)
        # V + M Models
        if encoder_to_share_model_with is not None:
            self.vae = encoder_to_share_model_with.vae
            self.rnn = encoder_to_share_model_with.rnn
        else:
            # load world model
            if self.backend == "VAE_LSTM":
                reset_graph()
                self.vae = ConvVAE(z_size=_Z, batch_size=1, is_training=False)
                self.vae.load_json(vae_model_path)
                if self.encoding in ["VM", "M_ONLY"]:
                    hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H)
                    self.rnn = MDNRNN(hps, gpu_mode=gpu)
                    self.rnn.load_json(rnn_model_path)
            elif self.backend == "VAE1D_LSTM":
                reset_graph()
                self.vae = Conv1DVAE(z_size=_Z, batch_size=1, is_training=False)
                self.vae.load_json(vae1d_model_path)
                if self.encoding in ["VM", "M_ONLY"]:
                    hps = sample_hps_params. _replace(seq_width=_Z+_G, action_width=_A, rnn_size=_H)
                    self.rnn = MDNRNN(hps, gpu_mode=gpu)
                    self.rnn.load_json(rnn1d_model_path)
            elif self.backend == "GPT":
                mconf = GPTConfig(BLOCK_SIZE, _H)
                model = GPT(mconf, gpu=gpu)
                load_checkpoint(model, gpt_model_path, gpu=gpu)
                self.vae = model
                self.rnn = model
            elif self.backend == "GPT1D":
                mconf = GPTConfig(BLOCK_SIZE, _H)
                model = GPT1D(mconf, gpu=gpu)
                load_checkpoint(model, gpt1d_model_path, gpu=gpu)
                self.vae = model
                self.rnn = model
            elif self.backend == "VAELSTM":
                mconf = VAELSTMConfig(_Z, _H)
                model = VAELSTM(mconf, gpu=gpu)
                load_checkpoint(model, vaelstm_model_path, gpu=gpu)
                self.vae = model
                self.rnn = model
            elif self.backend == "VAE1DLSTM":
                mconf = VAE1DLSTMConfig(_Z, _H)
                model = VAE1DLSTM(mconf, gpu=gpu)
                load_checkpoint(model, vae1dlstm_model_path, gpu=gpu)
                self.vae = model
                self.rnn = model
            else:
                raise NotImplementedError
        # other tools
        self.rings_def = generate_rings(_64, _64)
        self.viewer = None
        # environment state variables
        self.reset()

    def reset(self):
        if self.encoding in ["VM", "M_ONLY"]:
            if self.backend in ["VAE_LSTM", "VAE1D_LSTM"]:
                self.state = rnn_init_state(self.rnn)
            elif self.backend in ["GPT", "VAELSTM", "VAE1DLSTM", "GPT1D"]:
                self.gpt_sequence = []
        self.lidar_z = np.zeros(self._Z)

    def close(self):
        if self.viewer is not None:
            self.viewer.close()

    def _get_last_decoded_scan(self):
        obs_pred = self.vae.decode(self.lidar_z.reshape((1,self._Z)))
        if self.backend in ["VAE1DLSTM", "GPT1D", "VAE1D_LSTM"]:
            decoded_scan = (obs_pred * self.LIDAR_NORM_FACTOR).reshape((_L))
        else:
            rings_pred = obs_pred * self.rings_def["rings_to_bool"]
            decoded_scan = self.rings_def["rings_to_lidar"](rings_pred, _L).reshape((_L))
        return decoded_scan

    def _encode_obs(self, obs, action):
        """
    obs is (lidar, other_obs)
    where lidar is (time_samples, ray, channel)
    and other_obs is (5,) - [goal_x, goal_y, vel_x, vel_y, vel_theta] all in robot frame

    h is (32+2+512), i.e. concat[lidar_z, robotstate, h rnn state]
    lidar_z is -inf, inf
    h rnn state is ?
    other_obs is -inf, inf
    """
        # convert lidar scan to obs
        lidar_scan = obs[0]  # latest scan only obs (buffer, ray, channel)
        lidar_scan = lidar_scan.reshape(1, _L).astype(np.float32)
        lidar_mode = "scans" if "1D" in self.backend else "rings"
        lidar_obs = scans_to_lidar_obs(lidar_scan, lidar_mode, self.rings_def, channel_first=False)
        self.last_lidar_obs = lidar_obs  # for rendering purposes

        # obs to z, mu, logvar
        mu, logvar = self.vae.encode_mu_logvar(lidar_obs)
        mu = mu[0]
        logvar = logvar[0]
        s = logvar.shape
        if NO_VAE_VAR:
            lidar_z = mu * 1.
        else:
            lidar_z = mu + np.exp(logvar / 2.0) * np.random.randn(*s)

        # encode obs through V + M
        self.lidar_z = lidar_z
        if self.encoding == "V_ONLY":
            encoded_obs = np.concatenate([self.lidar_z, obs[1]], axis=0)
        elif self.encoding in ["VM", "M_ONLY"]:
            # get h
            if self.backend in ["VAE_LSTM", "VAE1D_LSTM"]:
                goal_z = obs[1][:2] / MAX_GOAL_DIST
                rnn_z = np.concatenate([lidar_z, goal_z], axis=-1)
                self.state = rnn_next_state(self.rnn, rnn_z, action, self.state)
                h = self.state.h[0]
            elif self.backend in ["GPT", "VAELSTM", "VAE1DLSTM", "GPT1D"]:
                self.gpt_sequence.append(dict(obs=lidar_obs[0], state=obs[1][:2], action=action))
                self.gpt_sequence = self.gpt_sequence[:BLOCK_SIZE]
                h = self.rnn.get_h(self.gpt_sequence)
            # encoded obs
            if self.encoding == "VM":
                encoded_obs = np.concatenate([self.lidar_z, obs[1], h], axis=0)
            elif self.encoding == "M_ONLY":
                encoded_obs = np.concatenate([obs[1], h], axis=0)
        return encoded_obs

    def _render_rings_polar(self, close, save_to_file=False):
        if close:
            self.viewer.close()
            return
        # rendering
        if self.backend in ["VAE1DLSTM", "GPT1D", "VAE1D_LSTM"]:
            return False
        else:
            last_rings_obs = self.last_lidar_obs.reshape((_64, _64, 1))
            last_rings_pred = self.vae.decode(self.lidar_z.reshape((1,self._Z))).reshape((_64, _64, 1))
            import matplotlib.pyplot as plt
            plt.ion()
            fig, (ax1, ax2) = plt.subplots(
                1, 2, subplot_kw=dict(projection="polar"), num="rings"
            )
            ax1.clear()
            ax2.clear()
            if self.viewer is None:
                self.rendering_iteration = 0
            self.viewer = fig
            self.rings_def["visualize_rings"](last_rings_obs, scan=None, fig=fig, ax=ax1)
            self.rings_def["visualize_rings"](last_rings_pred, scan=None, fig=fig, ax=ax2)
            ax1.set_ylim([0, 10])
            ax1.set_title("ground truth")
            ax2.set_ylim([0, 10])
            ax2.set_title("lidar reconstruction")
            # rings box viz
            fig2, (ax1, ax2) = plt.subplots(1, 2, num="2d")
            ax1.clear()
            ax2.clear()
            ax1.imshow(np.squeeze(last_rings_obs), cmap=plt.cm.Greys)
            ax2.imshow(np.squeeze(last_rings_pred), cmap=plt.cm.Greys)
            ax1.set_title("ground truth")
            ax2.set_title("lidar reconstruction")
            # update
            plt.pause(0.01)
            self.rendering_iteration += 1
            if save_to_file:
                fig.savefig(
                    "/tmp/encodedenv_polar{:04d}.png".format(self.rendering_iteration))
                fig2.savefig(
                    "/tmp/encodedenv_box{:04d}.png".format(self.rendering_iteration))

    def _render_rings(self, close, save_to_file=False):
        if close:
            self.viewer.close()
            return
        # rendering
        if self.backend in ["VAE1DLSTM", "GPT1D", "VAE1D_LSTM"]:
            return False
        else:
            last_rings_obs = self.last_lidar_obs.reshape((_64, _64))
            last_rings_pred = self.vae.decode(self.lidar_z.reshape((1,self._Z))).reshape((_64, _64))
            # Window and viewport size
            ring_size = _64  # grid cells
            padding = 4  # grid cells
            grid_size = 1  # px per grid cell
            WINDOW_W = (2 * ring_size + 3 * padding) * grid_size
            WINDOW_H = (1 * ring_size + 2 * padding) * grid_size
            VP_W = WINDOW_W
            VP_H = WINDOW_H
            from gym.envs.classic_control import rendering
            import pyglet
            from pyglet import gl
            # Create viewer
            if self.viewer is None:
                self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
                self.rendering_iteration = 0
            # Render in pyglet
            win = self.viewer.window
            win.switch_to()
            win.dispatch_events()
            win.clear()
            gl.glViewport(0, 0, VP_W, VP_H)
            # colors
            bgcolor = np.array([0.4, 0.8, 0.4])
            # Green background
            gl.glBegin(gl.GL_QUADS)
            gl.glColor4f(bgcolor[0], bgcolor[1], bgcolor[2], 1.0)
            gl.glVertex3f(0, VP_H, 0)
            gl.glVertex3f(VP_W, VP_H, 0)
            gl.glVertex3f(VP_W, 0, 0)
            gl.glVertex3f(0, 0, 0)
            gl.glEnd()
            # rings - observation
            w_offset = 0
            for rings in [last_rings_obs, last_rings_pred]:
                for i in range(ring_size):
                    for j in range(ring_size):
                        cell_color = 1 - rings[i, j]
                        cell_y = (padding + i) * grid_size  # px
                        cell_x = (padding + j + w_offset) * grid_size  # px
                        gl.glBegin(gl.GL_QUADS)
                        gl.glColor4f(cell_color, cell_color, cell_color, 1.0)
                        gl.glVertex3f(cell_x+       0,  cell_y+grid_size, 0)  # noqa
                        gl.glVertex3f(cell_x+grid_size, cell_y+grid_size, 0)  # noqa
                        gl.glVertex3f(cell_x+grid_size, cell_y+        0, 0)  # noqa
                        gl.glVertex3f(cell_x+        0, cell_y+        0, 0)  # noqa
                        gl.glEnd()
                w_offset += ring_size + padding
            if save_to_file:
                pyglet.image.get_buffer_manager().get_color_buffer().save(
                    "/tmp/encodeder_rings{:04d}.png".format(self.rendering_iteration))
            # actualize
            win.flip()
            self.rendering_iteration += 1
            return self.viewer.isopen
Beispiel #14
0
from navrep.models.rnn import reset_graph, sample_hps_params, MDNRNN, get_pi_idx
from navrep.models.vae2d import ConvVAE

# parameters
TEMPERATURE = 0.5
_Z = 32

sequence_z_path = os.path.expanduser(
    "~/navrep/datasets/M/ian/000_mus_logvars_robotstates_actions_rewards_dones.npz"
)
rnn_model_path = os.path.expanduser("~/navrep/models/M/rnn.json")
vae_model_path = os.path.expanduser("~/navrep/models/V/vae.json")

reset_graph()
rnn = MDNRNN(sample_hps_params, gpu_mode=False)
vae = ConvVAE(batch_size=1, is_training=False)

vae.load_json(vae_model_path)
rnn.load_json(rnn_model_path)

rings_def = generate_rings(64, 64)

# load sequence image encoding
arrays = np.load(sequence_z_path)
sequence_action = arrays["actions"]
sequence_mu = arrays["mus"]
sequence_logvar = arrays["logvars"]
sequence_z = sequence_mu + np.exp(
    sequence_logvar / 2.0) * np.random.randn(*(sequence_mu.shape))
SEQUENCE_LENGTH = len(sequence_mu)
Beispiel #15
0
if VARIANT == "marktwo":
    SS = None
    vae_path = os.path.expanduser("~/navrep/models/V/marktwovae.json")
    V_dataset_folder = os.path.expanduser("~/navrep/datasets/V/marktwo")
    M_dataset_folder = os.path.expanduser("~/navrep/datasets/M/marktwo")
if VARIANT == "navreptrain":
    SS = None
    vae_path = os.path.expanduser("~/navrep/models/V/navreptrainvae.json")
    V_dataset_folder = os.path.expanduser("~/navrep/datasets/V/navreptrain")
    M_dataset_folder = os.path.expanduser("~/navrep/datasets/M/navreptrain")

# create network
reset_graph()
vae = ConvVAE(
    z_size=_Z,
    batch_size=1,
    is_training=False,
)
vae.load_json(vae_path)

# rings converter
rings_def = generate_rings(64, 64)

# labels to learn are x, r, d (obs, reward, done)

files = []
for dirpath, dirnames, filenames in os.walk(V_dataset_folder):
    for filename in [f for f in filenames if f.endswith(".npz")]:
        files.append(os.path.join(dirpath, filename))
files = sorted(files)
for path in files:
Beispiel #16
0
# Parameters for training
batch_size = 100
NUM_EPOCH = 1000  # 10
DATA_DIR = "record"
HOME = os.path.expanduser("~")

model_save_dir = HOME + "/navrep/models/V"
model_save_path = os.path.join(model_save_dir, "imvae.json")
if not os.path.exists(model_save_dir):
    os.makedirs(model_save_dir)

# create network
reset_graph()
vae = ConvVAE(
    batch_size=batch_size,
    is_training=True,
    reuse=False,
    channels=3,
)

# create training dataset
dataset, _, _, _ = rosbag_to_image_dataset(
    "~/rosbags/openlab_rosbags/corridor_koze_kids.bag")
if len(dataset) == 0:
    raise ValueError("no images found, exiting")

# split into batches:
total_length = len(dataset)
num_batches = int(np.floor(total_length / batch_size))

# train loop:
print("train", "step", "loss", "recon_loss", "kl_loss")
Beispiel #17
0
class DreamEnv(object):
    def __init__(
            self,
            temperature=0.25,
            initial_z_path=os.path.
        expanduser(
            "~/navrep/datasets/M/ian/000_mus_logvars_robotstates_actions_rewards_dones.npz"
        ),
            rnn_model_path=os.path.expanduser("~/navrep/models/M/rnn.json"),
            vae_model_path=os.path.expanduser("~/navrep/models/V/vae.json"),
    ):
        # constants
        self.TEMPERATURE = temperature
        self.DT = 0.5  # should be the same as data rnn was trained with
        # V + M Models
        reset_graph()
        self.rnn = MDNRNN(sample_hps_params, gpu_mode=False)
        self.vae = ConvVAE(batch_size=1, is_training=False)
        self.vae.load_json(vae_model_path)
        self.rnn.load_json(rnn_model_path)
        # load initial image encoding
        arrays = np.load(initial_z_path)
        initial_mu = arrays["mus"][0]
        initial_logvar = arrays["logvars"][0]
        initial_robotstate = arrays["robotstates"][0]
        ini_lidar_z = initial_mu + np.exp(
            initial_logvar / 2.0) * np.random.randn(*(initial_mu.shape))
        ini_goal_z = initial_robotstate[:2] / MAX_GOAL_DIST
        self.initial_z = np.concatenate([ini_lidar_z, ini_goal_z], axis=-1)
        # other tools
        self.rings_def = generate_rings(64, 64)
        self.viewer = None
        # environment state variables
        self.reset()
        # hot-start the rnn state
        for i in range(20):
            self.step(np.array([0, 0, 0]), override_next_z=self.initial_z)

    def step(self, action, override_next_z=None):
        feed = {
            self.rnn.input_z: np.reshape(self.prev_z, (1, 1, _Z + _G)),
            self.rnn.input_action: np.reshape(action, (1, 1, 3)),
            self.rnn.input_restart: np.reshape(self.prev_restart, (1, 1)),
            self.rnn.initial_state: self.rnn_state,
        }

        [logmix, mean, logstd, logrestart, next_state] = self.rnn.sess.run(
            [
                self.rnn.out_logmix,
                self.rnn.out_mean,
                self.rnn.out_logstd,
                self.rnn.out_restart_logits,
                self.rnn.final_state,
            ],
            feed,
        )
        OUTWIDTH = _Z + _G

        if self.TEMPERATURE == 0:  # deterministically pick max of MDN distribution
            mixture_idx = np.argmax(logmix, axis=-1)
            chosen_mean = mean[(range(OUTWIDTH), mixture_idx)]
            chosen_logstd = logstd[(range(OUTWIDTH), mixture_idx)]
            next_z = chosen_mean
        else:  # sample from modelled MDN distribution
            mixprob = np.copy(logmix) / self.TEMPERATURE  # adjust temperatures
            mixprob -= mixprob.max()
            mixprob = np.exp(mixprob)
            mixprob /= mixprob.sum(axis=1).reshape(OUTWIDTH, 1)

            mixture_idx = np.zeros(OUTWIDTH)
            chosen_mean = np.zeros(OUTWIDTH)
            chosen_logstd = np.zeros(OUTWIDTH)
            for j in range(OUTWIDTH):
                idx = get_pi_idx(np.random.rand(), mixprob[j])
                mixture_idx[j] = idx
                chosen_mean[j] = mean[j][idx]
                chosen_logstd[j] = logstd[j][idx]
            rand_gaussian = np.random.randn(OUTWIDTH) * np.sqrt(
                self.TEMPERATURE)
            next_z = chosen_mean + np.exp(chosen_logstd) * rand_gaussian
        if sample_hps_params.differential_z:
            next_z = self.prev_z + next_z

        next_restart = 0
        #         if logrestart[0] > 0:
        #             next_restart = 1

        self.prev_z = next_z
        if override_next_z is not None:
            self.prev_z = override_next_z
        self.prev_restart = next_restart
        self.rnn_state = next_state
        # logging-only vars, used for rendering
        self.prev_action = action
        self.episode_step += 1

        return next_z, None, next_restart, {}

    def reset(self):
        self.prev_z = self.initial_z
        self.prev_restart = np.array([1])
        self.rnn_state = self.rnn.sess.run(self.rnn.zero_state)
        # logging vars
        self.prev_action = np.array([0.0, 0.0, 0.0])
        self.episode_step = 0

    def render(self, mode="human", close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
            return

        # get last z decoding
        rings_pred = (
            self.vae.decode(self.prev_z.reshape(1, _Z + _G)[:, :_Z]) *
            self.rings_def["rings_to_bool"])
        predicted_ranges = self.rings_def["rings_to_lidar"](rings_pred, 1080)
        goal_pred = self.prev_z.reshape((_Z + _G, ))[_Z:] * MAX_GOAL_DIST

        if mode == "rgb_array":
            raise NotImplementedError
        elif mode == "human":
            # Window and viewport size
            WINDOW_W = 256
            WINDOW_H = 256
            M_PER_PX = 25.6 / WINDOW_H
            VP_W = WINDOW_W
            VP_H = WINDOW_H
            from gym.envs.classic_control import rendering
            import pyglet
            from pyglet import gl

            # Create viewer
            if self.viewer is None:
                self.viewer = rendering.Viewer(WINDOW_W, WINDOW_H)
                self.score_label = pyglet.text.Label(
                    "0000",
                    font_size=12,
                    x=20,
                    y=WINDOW_H * 2.5 / 40.00,
                    anchor_x="left",
                    anchor_y="center",
                    color=(255, 255, 255, 255),
                )
                #                 self.transform = rendering.Transform()
                self.currently_rendering_iteration = 0
                self.image_lock = threading.Lock()
            # Render in pyglet
            def make_circle(c, r, res=10):
                thetas = np.linspace(0, 2 * np.pi, res + 1)[:-1]
                verts = np.zeros((res, 2))
                verts[:, 0] = c[0] + r * np.cos(thetas)
                verts[:, 1] = c[1] + r * np.sin(thetas)
                return verts

            with self.image_lock:
                self.currently_rendering_iteration += 1
                self.viewer.draw_circle(r=10, color=(0.3, 0.3, 0.3))
                win = self.viewer.window
                win.switch_to()
                win.dispatch_events()
                win.clear()
                gl.glViewport(0, 0, VP_W, VP_H)
                # colors
                bgcolor = np.array([0.4, 0.8, 0.4])
                nosecolor = np.array([0.3, 0.3, 0.3])
                lidarcolor = np.array([1.0, 0.0, 0.0])
                # Green background
                gl.glBegin(gl.GL_QUADS)
                gl.glColor4f(bgcolor[0], bgcolor[1], bgcolor[2], 1.0)
                gl.glVertex3f(0, VP_H, 0)
                gl.glVertex3f(VP_W, VP_H, 0)
                gl.glVertex3f(VP_W, 0, 0)
                gl.glVertex3f(0, 0, 0)
                gl.glEnd()
                # LIDAR
                i = WINDOW_W / 2.0
                j = WINDOW_H / 2.0
                angle = np.pi / 2.0
                scan = np.squeeze(predicted_ranges)
                lidar_angles = np.linspace(0, 2 * np.pi, len(scan) + 1)[:-1]
                lidar_angles = lidar_angles + np.pi / 2.  # make robot face up
                i_ray_ends = i + scan / M_PER_PX * np.cos(lidar_angles)
                j_ray_ends = j + scan / M_PER_PX * np.sin(lidar_angles)
                is_in_fov = np.cos(lidar_angles - angle) >= 0.78
                for ray_idx in range(len(scan)):
                    end_i = i_ray_ends[ray_idx]
                    end_j = j_ray_ends[ray_idx]
                    gl.glBegin(gl.GL_LINE_LOOP)
                    if is_in_fov[ray_idx]:
                        gl.glColor4f(1.0, 1.0, 0.0, 0.1)
                    else:
                        gl.glColor4f(lidarcolor[0], lidarcolor[1],
                                     lidarcolor[2], 0.1)
                    gl.glVertex3f(i, j, 0)
                    gl.glVertex3f(end_i, end_j, 0)
                    gl.glEnd()
                # Agent body
                i = WINDOW_W / 2.0
                j = WINDOW_H / 2.0
                r = 0.3 / M_PER_PX
                angle = np.pi / 2.0
                poly = make_circle((i, j), r)
                gl.glBegin(gl.GL_POLYGON)
                color = np.array([1.0, 1.0, 1.0])
                gl.glColor4f(color[0], color[1], color[2], 1)
                for vert in poly:
                    gl.glVertex3f(vert[0], vert[1], 0)
                gl.glEnd()
                # Direction triangle
                inose = i + r * np.cos(angle)
                jnose = j + r * np.sin(angle)
                iright = i + 0.3 * r * -np.sin(angle)
                jright = j + 0.3 * r * np.cos(angle)
                ileft = i - 0.3 * r * -np.sin(angle)
                jleft = j - 0.3 * r * np.cos(angle)
                gl.glBegin(gl.GL_TRIANGLES)
                gl.glColor4f(nosecolor[0], nosecolor[1], nosecolor[2], 1)
                gl.glVertex3f(inose, jnose, 0)
                gl.glVertex3f(iright, jright, 0)
                gl.glVertex3f(ileft, jleft, 0)
                gl.glEnd()
                # Goal
                goalcolor = np.array([1., 1., 0.3])
                px_goal = goal_pred / M_PER_PX
                igoal = i - px_goal[1]  # rotate 90deg to face up
                jgoal = j + px_goal[0]
                # Goal line
                gl.glBegin(gl.GL_LINE_LOOP)
                gl.glColor4f(goalcolor[0], goalcolor[1], goalcolor[2], 1)
                gl.glVertex3f(i, j, 0)
                gl.glVertex3f(igoal, jgoal, 0)
                gl.glEnd()
                # Goal markers
                gl.glBegin(gl.GL_TRIANGLES)
                gl.glColor4f(goalcolor[0], goalcolor[1], goalcolor[2], 1)
                triangle = make_circle((igoal, jgoal), r / 3., res=3)
                for vert in triangle:
                    gl.glVertex3f(vert[0], vert[1], 0)
                gl.glEnd()
                # Text
                self.score_label.text = "A {:.1f} {:.1f} {:.1f} S {}".format(
                    self.prev_action[0],
                    self.prev_action[1],
                    self.prev_action[2],
                    self.episode_step,
                )
                self.score_label.draw()
                win.flip()
                return self.viewer.isopen

    def close(self):
        self.render(close=True)

    def _get_dt(self):
        return self.DT

    def _get_viewer(self):
        return self.viewer
Beispiel #18
0
            lidar_e = None
            state_e = None
            if step % 200 == 0:
                # load VAE
                if VAE_TYPE == "1d":
                    if vae is None:
                        vae = Conv1DVAE(z_size=_Z,
                                        batch_size=model.hps.max_seq_len - 1,
                                        is_training=False)
                        vae.load_json(vae_model_path)
                    lidar_e, state_e = vae1d_rnn_worldmodel_error(
                        model, test_dataset_folder, vae)
                else:
                    if vae is None:
                        vae = ConvVAE(z_size=_Z,
                                      batch_size=model.hps.max_seq_len - 1,
                                      is_training=False)
                        vae.load_json(vae_model_path)
                    lidar_e, state_e = rnn_worldmodel_error(
                        model, test_dataset_folder, vae)

                print("Test: lidar error {}, state error {}".format(
                    lidar_e, state_e))
                model.save_json(model_path)

            if step % 20 == 0 and step > 0:
                end = time.time()
                time_taken = end - start
                start = time.time()
                output_log = (
                    "step: %d, lr: %.6f, cost: %.4f, z_cost: %.4f, r_cost: %.4f, train_time_taken: %.4f"
Beispiel #19
0
from navrep.models.tcn import reset_graph, sample_hps_params, MDNTCN, get_pi_idx
from navrep.models.vae2d import ConvVAE

# parameters
TEMPERATURE = 0.5
_Z = 32

sequence_z_path = os.path.expanduser(
    "~/navrep/datasets/M/ian/000_mus_logvars_robotstates_actions_rewards_dones.npz"
)
rnn_model_path = os.path.expanduser("~/navrep/models/M/tcn.json")
vae_model_path = os.path.expanduser("~/navrep/models/V/vae.json")

reset_graph()
tcn = MDNTCN(sample_hps_params, gpu_mode=False)
vae = ConvVAE(batch_size=1, is_training=False)

vae.load_json(vae_model_path)
tcn.load_json(rnn_model_path)

rings_def = generate_rings(64, 64)

# load sequence image encoding
arrays = np.load(sequence_z_path)
sequence_action = arrays["actions"]
sequence_mu = arrays["mus"]
sequence_logvar = arrays["logvars"]
sequence_restart = arrays["dones"]
sequence_z = sequence_mu + np.exp(
    sequence_logvar / 2.0) * np.random.randn(*(sequence_mu.shape))