def plot_visitation(traj, mdp, args, filename='visitation.pdf'): # TODO: it only works for Pinball. if args.tasktype == 'pinball': xs = [s.data[0] for s in traj] ys = [s.data[1] for s in traj] plt.plot(xs, ys, 'b.', alpha=0.5) for obs in mdp.domain.environment.obstacles: point_list = obs.points xlist = [] ylist = [] for p in point_list: xlist.append(p[0]) ylist.append(p[1]) plt.fill(xlist, ylist, 'k') elif args.task == 'PointMaze-v0' or args.task == 'AntMaze-v0': xs = [] ys = [] for s in traj: if -4.0 <= s.data[0] and s.data[0] <= -4.0 + 8.0 * 3.0 and \ -4.0 <= s.data[1] and s.data[1] <= -4.0 + 8.0 * 3.0: xs.append(s.data[0]) ys.append(s.data[1]) plt.plot(xs, ys, 'b.', alpha=0.5) # TODO: (x,y) coordinates start at 0, 0. # How is the coordinates signed? maze = [[1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 1, 1, 0, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]] scale = 8.0 for y in range(5): for x in range(5): if maze[y][x] == 1: # We decrement x and y because the (0, 0)-coordinate is set at (1, 1) position in the maze. xbase, ybase = scale * (x - 1.5), scale * (y - 1.5) xlist = [xbase, xbase + scale, xbase + scale, xbase] ylist = [ybase, ybase, ybase + scale, ybase + scale] plt.fill(xlist, ylist, 'k') elif args.task == 'MontezumaRevenge-ram-v0': # TODO: Show the background of the Monte? img = imread('./montezuma.jpg') plt.imshow(img, zorder=0, extent=[0, 160, 0, 210]) feature = Monte() pairs = [feature.feature(s, 0) for s in traj] xs_img = [p[0] for p in pairs] ys_img = [p[1] for p in pairs] plt.xlim([0, 160]) plt.ylim([0, 210]) plt.plot(xs_img, ys_img, 'r.', alpha=0.5) plt.savefig(filename) plt.close()
def setup_networks(self): if self.f_func == 'nn': self.f_function = SpectrumNetwork(self.sess, obs_dim=self.obs_dim, n_units=self.n_units, name=self.name) elif self.f_func == 'nnf': features = Monte() self.f_function = SpectrumNetwork(self.sess, obs_dim=self.obs_dim, feature=features, n_units=self.n_units, name=self.name) elif self.f_func == 'nns': features = Subset(state_dim=self.obs_dim, feature_indices=[0, 1]) # TODO: parameterize self.f_function = SpectrumNetwork(self.sess, obs_dim=self.obs_dim, feature=features, n_units=self.n_units, name=self.name) elif self.f_func == 'nnc': # Convolutions self.f_function = SpectrumNetwork(self.sess, obs_dim=self.obs_dim, n_units=self.n_units, conv=True, name=self.name) elif self.f_func == 'rand': self.f_function = None else: print('f_func =', self.f_func) # print('len(ffnc)=', len(self.f_func)) assert (False) if self.f_function is not None: self.f_function.initialize() self.agent = DQNAgent(self.sess, obs_dim=self.obs_dim, num_actions=self.num_actions, gamma=0.99, name=self.name) self.agent.reset()
def plot_eigenfunction(op, args, xind=0, yind=1, filename='visualize_ef.pdf'): # Pinball mdp, state_dim, state_bound, num_actions, action_dim, action_bound = get_mdp_params(args) n_samples = 2000 low_bound = state_bound[0] up_bound = state_bound[1] if args.task == 'AntMaze-v0' or args.task == 'PointMaze-v0': low_bound[xind] = 0.0 low_bound[yind] = 0.0 up_bound[xind] = 8.0 * 3.0 up_bound[yind] = 8.0 * 3.0 if args.tasktype == 'atari': low_bound[xind] = 0.0 low_bound[yind] = 0.0 up_bound[xind] = 160.0 up_bound[yind] = 210.0 xs = [] ys = [] fs = [] # if np.isinf(low_bound).any() or np.isinf(up_bound).any(): # bfr = sample_option_trajectories(mdp, args, noptions=0) # # ss, _, _, _, _ = bfr.sample(n_samples) # # max_x = float('-inf') # min_x = float('inf') # max_y = float('-inf') # min_y = float('inf') # for i in range(n_samples): # x = ss[i].data[xind] # y = ss[i].data[yind] # max_x = max(x, max_x) # min_x = min(x, min_x) # max_y = max(y, max_y) # min_y = min(y, min_y) # low_bound[xind] = min_x # up_bound[xind] = max_x # low_bound[yind] = min_y # up_bound[yind] = max_y # TODO: Implement a script to plot the f-value of the states # visited by the agent instead of sampling uniform randomly. if args.restoretraj: print('restoring buffer from ' + './vis/' + args.task + 'option' + str(args.noptions - 1) + '_' + str(args.ffuncnunit) + '_' + str(args.rseed) + '/' + 'traj') bfr = ExperienceBuffer() bfr.restore(args.basedir + '/vis/' + args.task + 'option' + str(args.noptions - 1) + '_' + str(args.ffuncnunit) + '_' + str(args.rseed) + '/' + 'traj') bfr_size = bfr.size() print('bfr_size=', bfr_size) # TODO: parameter? samples, _, _, _, _ = bfr.sample(n_samples) # samples = [bfr.buffer[i][0] for i in range(min(bfr.size(), n_samples))] if args.task == 'MontezumaRevenge-ram-v0': feature = Monte() xs = [feature.feature(s, 0)[0] for s in samples] ys = [feature.feature(s, 0)[1] for s in samples] elif args.ffunction == 'nns': feature = Subset(state_dim, [0, 1]) xs = [feature.feature(s, 0)[0] for s in samples] ys = [feature.feature(s, 0)[1] for s in samples] else: xs = [s.data[xind] for s in samples] ys = [s.data[yind] for s in samples] else: xs = [random.uniform(low_bound[xind], up_bound[xind]) for _ in range(n_samples)] ys = [random.uniform(low_bound[yind], up_bound[yind]) for _ in range(n_samples)] fs = [] for i in range(len(xs)): if args.task == 'MontezumaRevenge-ram-v0': obs = np.array([xs[i], ys[i]]) obs = np.reshape(obs, (1, 2)) f_value = op.f_function.f_from_features(obs)[0][0] elif args.ffunction == 'nns': obs = np.array([xs[i], ys[i]]) obs = np.reshape(obs, (1, 2)) f_value = op.f_function.f_from_features(obs)[0][0] else: s = mdp.get_init_state() s.data[xind] = xs[i] s.data[yind] = ys[i] f_value = op.f_function(s)[0][0] fs.append(f_value) # TODO: What is the best colormap for all people (including color blinds?) but still appealing for majority? # bwr looks useful, but may be misleading?. cmap = matplotlib.cm.get_cmap('plasma') normalize = matplotlib.colors.Normalize(vmin=min(fs), vmax=max(fs)) colors = [cmap(normalize(value)) for value in fs] # colors_np = np.asarray(colors) fig, ax = plt.subplots(figsize=(8, 6)) ax.scatter(x=xs, y=ys, c=colors) cax, _ = matplotlib.colorbar.make_axes(ax) cbar = matplotlib.colorbar.ColorbarBase(cax, cmap=cmap, norm=normalize) term_th = op.lower_th cax.plot([0, 1], [term_th] * 2, 'k') term, nonterm = 0, 0 for f in fs: if f < term_th: term += 1 else: nonterm += 1 print(term, 'terms', nonterm, 'nonterms') # TODO: Only for pinball domains. What to do for MuJoCo? # Obstacles if args.tasktype == 'pinball': for obs in mdp.domain.environment.obstacles: point_list = obs.points xlist = [] ylist = [] for p in point_list: xlist.append(p[0]) ylist.append(p[1]) ax.fill(xlist, ylist, 'k') elif args.task == 'PointMaze-v0' or args.task == 'AntMaze-v0': # TODO: (x,y) coordinates start at 0, 0. # How is the coordinates signed? maze = [[1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 1, 1, 0, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]] scale = 8.0 for y in range(5): for x in range(5): if maze[y][x] == 1: # We decrement x and y because the (0, 0)-coordinate is set at (1, 1) position in the maze. xbase, ybase = scale * (x - 1), scale * (y - 1) xlist = [xbase, xbase + scale, xbase + scale, xbase] ylist = [ybase, ybase, ybase + scale, ybase + scale] ax.fill(xlist, ylist, 'k') elif args.task == 'MontezumaRevenge-ram-v0': # TODO: Show the background of the Monte? img = imread('./montezuma.jpg') ax.imshow(img, zorder=0, extent=[0, 160, 0, 210]) plt.savefig(filename) plt.close()
def plot_trajectory(traj, mdp, args, filename='trajectory.pdf'): if args.tasktype == 'pinball': xs = [s.x for s in traj] ys = [s.y for s in traj] plt.plot(xs, ys, 'g') plt.plot(xs[0], ys[0], 'bo') plt.plot(xs[-1], ys[-1], 'ro') for obs in mdp.domain.environment.obstacles: point_list = obs.points xlist = [] ylist = [] for p in point_list: xlist.append(p[0]) ylist.append(p[1]) plt.fill(xlist, ylist, 'k') elif args.task == 'PointMaze-v0' or args.task == 'AntMaze-v0': xs = [s.data[0] for s in traj] ys = [s.data[1] for s in traj] print('x =', min(xs), ' to ', max(xs)) print('y =', min(ys), ' to ', max(ys)) plt.plot(xs, ys, 'g') plt.plot(xs[0], ys[0], 'bo') plt.plot(xs[-1], ys[-1], 'ro') # TODO: (x,y) coordinates start at 0, 0. # How is the coordinates signed? maze = [[1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 1, 1, 0, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]] scale = 8.0 for y in range(5): for x in range(5): if maze[y][x] == 1: # We decrement x and y because the (0, 0)-coordinate is set at (1, 1) position in the maze. xbase, ybase = scale * (x - 1.5), scale * (y - 1.5) xlist = [xbase, xbase + scale, xbase + scale, xbase] ylist = [ybase, ybase, ybase + scale, ybase + scale] plt.fill(xlist, ylist, 'k') elif args.task == 'MontezumaRevenge-ram-v0': feature = Monte() xs = [feature.feature(s, 0)[0] for s in traj] ys = [feature.feature(s, 0)[1] for s in traj] # Duplicate detection # print('initpos=', xs[0], ys[0]) dup = 0 for i in range(1, len(xs)): # print('pos', i, '=', int(xs[i]), int(ys[i])) # if int(ys[i]) < 50: # do we have non-suicidal options? # dup = i if int(xs[i]) == int(xs[0]) and int(ys[i]) == int(ys[0]): dup = i print('dup=', dup) xs = xs[min(dup + 1, len(xs) - 1):] ys = ys[min(dup + 1, len(ys) - 1):] plt.plot(xs, ys, 'g') plt.plot(xs[0], ys[0], 'bo') plt.plot(xs[-1], ys[-1], 'ro') # TODO: Show the background of the Monte? img = imread('./montezuma.jpg') plt.imshow(img, zorder=0, extent=[0, 160, 0, 210]) plt.savefig(filename) plt.close()
def setup_networks(self): print('f_func=', self.f_func) if self.f_func == 'fourier': # low_bound = np.asarray([0.0, 0.0, -2.0, -2.0]) # up_bound = np.asarray([1.0, 1.0, 2.0, 2.0]) features = Fourier(state_dim=self.obs_dim, bound=self.obs_bound, order=4) self.f_function = SpectrumFourier(obs_dim=self.obs_dim, feature=features, name=self.name) elif self.f_func == 'nn': self.f_function = SpectrumNetwork(self.sess, obs_dim=self.obs_dim, n_units=self.n_units, name=self.name) elif self.f_func == 'nnf': features = Monte() self.f_function = SpectrumNetwork(self.sess, obs_dim=self.obs_dim, feature=features, n_units=self.n_units, name=self.name) elif self.f_func == 'nns': features = Subset(state_dim=self.obs_dim, feature_indices=[0, 1]) # TODO: parameterize self.f_function = SpectrumNetwork(self.sess, obs_dim=self.obs_dim, feature=features, n_units=self.n_units, name=self.name) elif self.f_func == 'nnc': # Convolutions self.f_function = SpectrumNetwork(self.sess, obs_dim=self.obs_dim, n_units=self.n_units, conv=True, name=self.name) elif self.f_func == 'rand': self.f_function = None elif self.f_func == 'agent': features = AgentPos(game='Freeway') self.f_function = SpectrumFourier(obs_dim=self.obs_dim, feature=features, name=self.name) else: print('f_func =', self.f_func) # print('len(ffnc)=', len(self.f_func)) assert (False) if self.f_function is not None: self.f_function.initialize() if self.low_method == 'linear': # low_bound = np.asarray([0.0, 0.0, -2.0, -2.0]) # up_bound = np.asarray([1.0, 1.0, 2.0, 2.0]) features = Fourier(state_dim=self.obs_dim, bound=self.obs_bound, order=3) self.agent = LinearQAgent(actions=range(self.num_actions), feature=features, name=self.name) elif self.low_method == 'ddpg': # TODO: Using on-policy method is not good for options? is DDPG off-policy? self.agent = DDPGAgent(self.sess, obs_dim=self.obs_dim, action_dim=self.action_dim, action_bound=self.action_bound, name=self.name) elif self.low_method == 'dqn': self.agent = DQNAgent(self.sess, obs_dim=self.obs_dim, num_actions=self.num_actions, gamma=0.99, name=self.name) elif self.low_method == 'rand': if self.num_actions is None: self.agent = RandomContAgent(action_dim=self.action_dim, action_bound=self.action_bound, name=self.name) else: self.agent = RandomAgent(range(self.num_actions), name=self.name) else: print('low_method=', self.low_method) assert (False) self.agent.reset()
def plot_op(op, args, mdp, state_bound, filename): print('visop') n_samples = 2000 # TODO: Visualize the options according to the direction # direction = args.reverse if args.restoretraj: sample, _, _, _, _ = bfr.sample(n_samples) if args.task == 'MontezumaRevenge-ram-v0': feature = Monte() xs = [feature.feature(s, 0)[0] for s in samples] ys = [feature.feature(s, 0)[1] for s in samples] elif args.ffunction == 'nns': feature = Subset(state_dim, [0, 1]) xs = [feature.feature(s, 0)[0] for s in samples] ys = [feature.feature(s, 0)[1] for s in samples] else: xs = [s.data[0] for s in samples] ys = [s.data[1] for s in samples] else: # TODO: bounds should be implemented inside the tasks. if 'Ant' in args.task or 'Point' in args.task: up_bound_x, low_bound_x, up_bound_y, low_bound_y = util.bounds(mdp) # low_bound_x = - 0.5 # low_bound_y = -4.0 # up_bound_x = -4.0 + 8.0 * 3.0 # up_bound_y = -4.0 + 8.0 * 3.0 elif args.tasktype == 'atari': low_bound_x = 0.0 low_bound_y = 0.0 up_bound_x = 160.0 up_bound_y = 210.0 else: low_bound_x = state_bound[0][0] low_bound_y = state_bound[0][1] up_bound_x = state_bound[1][0] up_bound_y = state_bound[1][1] xs = [ random.uniform(low_bound_x, up_bound_x) for _ in range(n_samples) ] ys = [ random.uniform(low_bound_y, up_bound_y) for _ in range(n_samples) ] fs = [] for i in range(len(xs)): if args.task == 'MontezumaRevenge-ram-v0': obs = np.array([xs[i], ys[i]]) obs = np.reshape(obs, (1, 2)) f_value = op.f_function.f_from_features(obs)[0][0] elif args.ffunction == 'nns': obs = np.array([xs[i], ys[i]]) obs = np.reshape(obs, (1, 2)) f_value = op.f_function.f_from_features(obs)[0][0] else: s = mdp.get_init_state() s.data[0] = xs[i] s.data[1] = ys[i] f_value = op.f_function(s)[0][0] fs.append(f_value) # TODO: Find the best color mapping for visualization. # TODO: What is the best thing we can do for color blinds? Intensity of the plot? # TODO: # if args.reverse: cmap = matplotlib.cm.get_cmap('Blues') # cmap = matplotlib.cm.get_cmap('plasma') if args.reverse: term_th = op.upper_th normalize = matplotlib.colors.Normalize( vmin=min(fs), vmax=term_th) # TODO: Does this give us an inverse direction? colors = [] for value in fs: if value < term_th: # colors.append(cmap(1.0 - normalize(value))) colors.append(cmap(normalize(value))) else: # TODO: What is gray rgb? # colors.append((0.15, 0.15, 0.15)) colors.append((0.15, 0.15, 0.15)) # colors.append((0.0 , 0.0 , 1.0)) else: term_th = op.lower_th normalize = matplotlib.colors.Normalize(vmin=term_th, vmax=max(fs)) colors = [] for value in fs: if value > term_th: colors.append(cmap(1.0 - normalize(value))) else: colors.append((0.15, 0.15, 0.15)) # colors.append((0, 0, 0)) # print('colors=', colors) # colors_np = np.asarray(colors) fig, ax = plt.subplots(figsize=(8, 6)) ax.scatter(x=xs, y=ys, c=colors) cax, _ = matplotlib.colorbar.make_axes(ax) cbar = matplotlib.colorbar.ColorbarBase(cax, cmap=cmap, norm=normalize) # cax.plot([0, 1], [term_th] * 2, 'k') term, nonterm = 0, 0 for f in fs: if f < term_th: term += 1 else: nonterm += 1 print(term, 'terms', nonterm, 'nonterms') plot_bg(mdp, args, fig, ax) plt.savefig(filename, bbox_inches='tight', pad_inches=0) plt.close()
def plot_traj(traj, mdp, args, filename='trajectory'): fig, ax = plt.subplots(figsize=(8, 6)) if args.tasktype == 'pinball': xs = [s.x for s in traj] ys = [s.y for s in traj] elif 'Point' in args.task or 'Ant' in args.task: # xs = [s.data[0] for s in traj] # ys = [s.data[1] for s in traj] xs = [maze_width(mdp) - s.data[1] for s in traj] ys = [maze_height(mdp) - s.data[0] for s in traj] print('x =', min(xs), ' to ', max(xs)) print('y =', min(ys), ' to ', max(ys)) import time for i in range(0, int(len(xs) / 3) + 1): # Plot every three states in a trajectory (otherwise it gets unreadable) mdp.env.wrapped_env.set_xy([xs[i * 3], ys[i * 3]]) mdp.env.wrapped_env.set_ori(traj[i * 3].data[2]) mdp.env.step([0, 0]) time.sleep(0.1) img = mdp.env.render(mode='rgb_array') if i < 10: imsave(args.task + '_0' + str(i) + '.png', img) else: imsave(args.task + '_' + str(i) + '.png', img) elif args.task == 'MontezumaRevenge-ram-v0': feature = Monte() xs = [feature.feature(s, 0)[0] for s in traj] ys = [feature.feature(s, 0)[1] for s in traj] # Duplicate detection dup = 0 for i in range(1, len(xs)): if int(xs[i]) == int(xs[0]) and int(ys[i]) == int(ys[0]): dup = i print('dup=', dup) xs = xs[min(dup + 1, len(xs) - 1):] ys = ys[min(dup + 1, len(ys) - 1):] elif args.tasktype == 'atari': init_s = traj[0] fig = np.reshape(init_s.data, (105, 80, 3)) # fig = np.reshape(init_s.data, (210, 160, 3)) plt.imshow(fig, vmin=0, vmax=255) plt.savefig(filename + '_init.pdf', bbox_inches='tight', pad_inches=0) goal_s = traj[-1] fig = np.reshape(goal_s.data, (105, 80, 3)) plt.imshow(fig, vmin=0, vmax=255) plt.savefig(filename + '_goal.pdf', bbox_inches='tight', pad_inches=0) return elif args.tasktype == 'atariram': if args.task == "MsPacman-ram-v0": target = (210, 164, 74) # Color of the pacman. elif args.task == "Freeway-ram-v0": target = (252, 252, 84) # Chicken else: for s in traj: fig = np.asarray(s.data) shape = fig.shape colors = set() pos = None for x in range(shape[1]): for y in range(shape[0]): if tuple(fig[y][x]) not in colors: colors.add(tuple(fig[y][x])) pos.append((x, y) + tuple(fig[y][x])) print('pos=', pos) assert (False) xs = [] ys = [] for s in traj: fig = np.asarray(s.data) shape = fig.shape pos = None for x in range(shape[1]): for y in range(shape[0]): if tuple(fig[y][x]) == target: pos = (x, 210 - y) break if pos is not None: break if pos is not None: xs.append(pos[0]) ys.append(pos[1]) else: print('bg Not implemented') plot_bg(mdp, args, fig, ax) plt.plot(xs, ys, 'g') plt.plot(xs[0], ys[0], 'bo') plt.plot(xs[-1], ys[-1], 'ro') plt.savefig(filename + '_' + str(len(traj)) + '.pdf', bbox_inches='tight', pad_inches=0) plt.close()
def plot_vis(traj, args, mdp, filename): if args.tasktype == 'pinball': xs = [s.data[0] for s in traj] ys = [s.data[1] for s in traj] elif 'Point' in args.task or 'Ant' in args.task: xs = [] ys = [] up_bound_x, low_bound_x, up_bound_y, low_bound_y = util.bounds(mdp) for s in traj: if low_bound_x <= s.data[0] and s.data[0] <= up_bound_x and \ low_bound_y<= s.data[1] and s.data[1] <= up_bound_y: xs.append(s.data[0]) ys.append(s.data[1]) elif args.task == 'MontezumaRevenge-ram-v0': feature = Monte() pairs = [feature.feature(s, 0) for s in traj] xs = [p[0] for p in pairs] ys = [p[1] for p in pairs] plt.xlim([0, 160]) plt.ylim([0, 210]) elif args.tasktype == 'atari': print('not implemented yet') assert (False) elif args.tasktype == 'atariram': if args.task == 'Freeway-ram-v0' or args.task == 'MsPacman-ram-v0': if args.task == 'Freeway-ram-v0': target = (252, 252, 84) # Chicken elif args.task == 'MsPacman-ram-v0': target = (210, 164, 74) # Color of the pacman. else: assert (False) xs = [] ys = [] for s in traj: fig = np.asarray(s.data) shape = fig.shape # print('shape=', shape) pos = None for x in range(shape[1]): for y in range(shape[0]): if tuple(fig[y][x]) == target: pos = (x, 210 - y) break if pos is not None: break if pos is not None: xs.append(pos[0]) ys.append(pos[1]) plt.xlim([0, 160]) plt.ylim([0, 210]) else: assert (False) fig, ax = plt.subplots(figsize=(8, 6)) plot_bg(mdp, args, fig, ax) ax.plot(xs, ys, 'b.', alpha=0.5) # Red is better for visualizing in Monte # init_s = mdp.domain.s0()[0] # print('init_s=', init_s,) # ax.plot(init_s[0], init_s[1], 'bo') # goal = mdp.domain.environment.target_pos # ax.plot(goal[0], goal[1], 'rx') # print('goal=', goal) plt.savefig(filename + ".pdf", bbox_inches='tight', pad_inches=0) plt.savefig(filename + ".png", bbox_inches='tight', pad_inches=0) plt.close()