Python Monte Examples, simple_rl.agents.func_approx.Features.Monte Python Examples

Example #1

0

Show file

File: visualize_visitation.py Project: sguo28/covering-options

def plot_visitation(traj, mdp, args, filename='visitation.pdf'):
    # TODO: it only works for Pinball.

    if args.tasktype == 'pinball':
        xs = [s.data[0] for s in traj]
        ys = [s.data[1] for s in traj]

        plt.plot(xs, ys, 'b.', alpha=0.5)
        for obs in mdp.domain.environment.obstacles:
            point_list = obs.points
            xlist = []
            ylist = []
            for p in point_list:
                xlist.append(p[0])
                ylist.append(p[1])

            plt.fill(xlist, ylist, 'k')
    elif args.task == 'PointMaze-v0' or args.task == 'AntMaze-v0':
        xs = []
        ys = []
        for s in traj:
            if -4.0 <= s.data[0] and s.data[0] <= -4.0 + 8.0 * 3.0 and \
               -4.0 <= s.data[1] and s.data[1] <= -4.0 + 8.0 * 3.0:
                xs.append(s.data[0])
                ys.append(s.data[1])

        plt.plot(xs, ys, 'b.', alpha=0.5)
        # TODO: (x,y) coordinates start at 0, 0.
        #       How is the coordinates signed?
        maze = [[1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 1, 1, 0, 1],
                [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]]
        scale = 8.0
        for y in range(5):
            for x in range(5):
                if maze[y][x] == 1:
                    # We decrement x and y because the (0, 0)-coordinate is set at (1, 1) position in the maze.
                    xbase, ybase = scale * (x - 1.5), scale * (y - 1.5)
                    xlist = [xbase, xbase + scale, xbase + scale, xbase]
                    ylist = [ybase, ybase, ybase + scale, ybase + scale]
                    plt.fill(xlist, ylist, 'k')
    elif args.task == 'MontezumaRevenge-ram-v0':
        # TODO: Show the background of the Monte?
        img = imread('./montezuma.jpg')
        plt.imshow(img, zorder=0, extent=[0, 160, 0, 210])

        feature = Monte()

        pairs = [feature.feature(s, 0) for s in traj]
        xs_img = [p[0] for p in pairs]
        ys_img = [p[1] for p in pairs]

        plt.xlim([0, 160])
        plt.ylim([0, 210])
        plt.plot(xs_img, ys_img, 'r.', alpha=0.5)

    plt.savefig(filename)
    plt.close()

Example #2

0

Show file

    def setup_networks(self):
        if self.f_func == 'nn':
            self.f_function = SpectrumNetwork(self.sess,
                                              obs_dim=self.obs_dim,
                                              n_units=self.n_units,
                                              name=self.name)
        elif self.f_func == 'nnf':
            features = Monte()
            self.f_function = SpectrumNetwork(self.sess,
                                              obs_dim=self.obs_dim,
                                              feature=features,
                                              n_units=self.n_units,
                                              name=self.name)
        elif self.f_func == 'nns':
            features = Subset(state_dim=self.obs_dim,
                              feature_indices=[0, 1])  # TODO: parameterize
            self.f_function = SpectrumNetwork(self.sess,
                                              obs_dim=self.obs_dim,
                                              feature=features,
                                              n_units=self.n_units,
                                              name=self.name)
        elif self.f_func == 'nnc':
            # Convolutions
            self.f_function = SpectrumNetwork(self.sess,
                                              obs_dim=self.obs_dim,
                                              n_units=self.n_units,
                                              conv=True,
                                              name=self.name)
        elif self.f_func == 'rand':
            self.f_function = None
        else:
            print('f_func =', self.f_func)
            # print('len(ffnc)=', len(self.f_func))
            assert (False)

        if self.f_function is not None:
            self.f_function.initialize()
        self.agent = DQNAgent(self.sess,
                              obs_dim=self.obs_dim,
                              num_actions=self.num_actions,
                              gamma=0.99,
                              name=self.name)

        self.agent.reset()

Example #3

0

Show file

File: visualize_option.py Project: sguo28/covering-options

def plot_eigenfunction(op, args, xind=0, yind=1, filename='visualize_ef.pdf'):
    # Pinball

    mdp, state_dim, state_bound, num_actions, action_dim, action_bound = get_mdp_params(args)
    
    n_samples = 2000
    
    low_bound = state_bound[0]
    up_bound = state_bound[1]

    if args.task == 'AntMaze-v0' or args.task == 'PointMaze-v0':
        low_bound[xind] = 0.0
        low_bound[yind] = 0.0
        up_bound[xind] = 8.0 * 3.0
        up_bound[yind] = 8.0 * 3.0

    if args.tasktype == 'atari':
        low_bound[xind] = 0.0
        low_bound[yind] = 0.0
        up_bound[xind] = 160.0
        up_bound[yind] = 210.0

    xs = []
    ys = []
    fs = []

    # if np.isinf(low_bound).any() or np.isinf(up_bound).any():
    #     bfr = sample_option_trajectories(mdp, args, noptions=0)
    # 
    #     ss, _, _, _, _ = bfr.sample(n_samples)
    # 
    #     max_x = float('-inf')
    #     min_x = float('inf')
    #     max_y = float('-inf')
    #     min_y = float('inf')
    #     for i in range(n_samples):
    #         x = ss[i].data[xind]
    #         y = ss[i].data[yind]
    #         max_x = max(x, max_x)
    #         min_x = min(x, min_x)
    #         max_y = max(y, max_y)
    #         min_y = min(y, min_y)
    #     low_bound[xind] = min_x
    #     up_bound[xind] = max_x
    #     low_bound[yind] = min_y
    #     up_bound[yind] = max_y

    # TODO: Implement a script to plot the f-value of the states
    #       visited by the agent instead of sampling uniform randomly.

    if args.restoretraj:
        print('restoring buffer from ' + './vis/' + args.task + 'option' + str(args.noptions - 1) + '_' + str(args.ffuncnunit) + '_' + str(args.rseed) + '/' + 'traj')
        bfr = ExperienceBuffer()
        bfr.restore(args.basedir + '/vis/' + args.task + 'option' + str(args.noptions - 1) + '_' + str(args.ffuncnunit) + '_' + str(args.rseed) + '/' + 'traj')
        bfr_size = bfr.size()
        print('bfr_size=', bfr_size) # TODO: parameter?

        samples, _, _, _, _ = bfr.sample(n_samples)
        # samples = [bfr.buffer[i][0] for i in range(min(bfr.size(), n_samples))]

        if args.task == 'MontezumaRevenge-ram-v0':
            feature = Monte()
            xs = [feature.feature(s, 0)[0] for s in samples]
            ys = [feature.feature(s, 0)[1] for s in samples]
        elif args.ffunction == 'nns':
            feature = Subset(state_dim, [0, 1])
            xs = [feature.feature(s, 0)[0] for s in samples]
            ys = [feature.feature(s, 0)[1] for s in samples]
        else:
            xs = [s.data[xind] for s in samples]
            ys = [s.data[yind] for s in samples]


         
    else:        
        xs = [random.uniform(low_bound[xind], up_bound[xind]) for _ in range(n_samples)]
        ys = [random.uniform(low_bound[yind], up_bound[yind]) for _ in range(n_samples)]

    fs = []
    
    for i in range(len(xs)):
        if args.task == 'MontezumaRevenge-ram-v0':
            obs = np.array([xs[i], ys[i]])
            obs = np.reshape(obs, (1, 2))
            f_value = op.f_function.f_from_features(obs)[0][0]
        elif args.ffunction == 'nns':
            obs = np.array([xs[i], ys[i]])
            obs = np.reshape(obs, (1, 2))
            f_value = op.f_function.f_from_features(obs)[0][0]
        else:
            s = mdp.get_init_state()
            s.data[xind] = xs[i]
            s.data[yind] = ys[i]
            f_value = op.f_function(s)[0][0]            
        fs.append(f_value)

    # TODO: What is the best colormap for all people (including color blinds?) but still appealing for majority?
    #       bwr looks useful, but may be misleading?.
    cmap = matplotlib.cm.get_cmap('plasma')
    normalize = matplotlib.colors.Normalize(vmin=min(fs), vmax=max(fs))
    colors = [cmap(normalize(value)) for value in fs]
    # colors_np = np.asarray(colors)

    fig, ax = plt.subplots(figsize=(8, 6))
    ax.scatter(x=xs, y=ys, c=colors)

    cax, _ = matplotlib.colorbar.make_axes(ax)
    cbar = matplotlib.colorbar.ColorbarBase(cax, cmap=cmap, norm=normalize)

    term_th = op.lower_th
    cax.plot([0, 1], [term_th] * 2, 'k')

    term, nonterm = 0, 0
    for f in fs:
        if f < term_th:
            term += 1
        else:
            nonterm += 1
    print(term, 'terms', nonterm, 'nonterms')
    # TODO: Only for pinball domains. What to do for MuJoCo?
    # Obstacles
    if args.tasktype == 'pinball':
        for obs in mdp.domain.environment.obstacles:
            point_list = obs.points
            xlist = []
            ylist = []
            for p in point_list:
                xlist.append(p[0])
                ylist.append(p[1])

            ax.fill(xlist, ylist, 'k')
            
    elif args.task == 'PointMaze-v0' or args.task == 'AntMaze-v0':
        # TODO: (x,y) coordinates start at 0, 0.
        #       How is the coordinates signed?
        maze = [[1, 1, 1, 1, 1],
        [1, 0, 0, 0, 1],
        [1, 1, 1, 0, 1],
        [1, 0, 0, 0, 1],
        [1, 1, 1, 1, 1]]
        scale = 8.0
        for y in range(5):
            for x in range(5):
                if maze[y][x] == 1:
                    # We decrement x and y because the (0, 0)-coordinate is set at (1, 1) position in the maze.
                    xbase, ybase = scale * (x - 1), scale * (y - 1)
                    xlist = [xbase, xbase + scale, xbase + scale, xbase]
                    ylist = [ybase, ybase, ybase + scale, ybase + scale]
                    ax.fill(xlist, ylist, 'k')
    elif args.task == 'MontezumaRevenge-ram-v0':
        # TODO: Show the background of the Monte?
        img = imread('./montezuma.jpg')
        ax.imshow(img, zorder=0, extent=[0, 160, 0, 210])
    
    plt.savefig(filename)
    plt.close()

Example #4

0

Show file

def plot_trajectory(traj, mdp, args, filename='trajectory.pdf'):

    if args.tasktype == 'pinball':
        xs = [s.x for s in traj]
        ys = [s.y for s in traj]

        plt.plot(xs, ys, 'g')

        plt.plot(xs[0], ys[0], 'bo')
        plt.plot(xs[-1], ys[-1], 'ro')

        for obs in mdp.domain.environment.obstacles:
            point_list = obs.points
            xlist = []
            ylist = []
            for p in point_list:
                xlist.append(p[0])
                ylist.append(p[1])

            plt.fill(xlist, ylist, 'k')
    elif args.task == 'PointMaze-v0' or args.task == 'AntMaze-v0':
        xs = [s.data[0] for s in traj]
        ys = [s.data[1] for s in traj]

        print('x =', min(xs), ' to ', max(xs))
        print('y =', min(ys), ' to ', max(ys))

        plt.plot(xs, ys, 'g')

        plt.plot(xs[0], ys[0], 'bo')
        plt.plot(xs[-1], ys[-1], 'ro')

        # TODO: (x,y) coordinates start at 0, 0.
        #       How is the coordinates signed?
        maze = [[1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 1, 1, 0, 1],
                [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]]
        scale = 8.0
        for y in range(5):
            for x in range(5):
                if maze[y][x] == 1:
                    # We decrement x and y because the (0, 0)-coordinate is set at (1, 1) position in the maze.
                    xbase, ybase = scale * (x - 1.5), scale * (y - 1.5)
                    xlist = [xbase, xbase + scale, xbase + scale, xbase]
                    ylist = [ybase, ybase, ybase + scale, ybase + scale]
                    plt.fill(xlist, ylist, 'k')

    elif args.task == 'MontezumaRevenge-ram-v0':
        feature = Monte()
        xs = [feature.feature(s, 0)[0] for s in traj]
        ys = [feature.feature(s, 0)[1] for s in traj]

        # Duplicate detection
        # print('initpos=', xs[0], ys[0])
        dup = 0
        for i in range(1, len(xs)):
            # print('pos', i, '=', int(xs[i]), int(ys[i]))
            # if int(ys[i]) < 50: # do we have non-suicidal options?
            #     dup = i
            if int(xs[i]) == int(xs[0]) and int(ys[i]) == int(ys[0]):
                dup = i
        print('dup=', dup)

        xs = xs[min(dup + 1, len(xs) - 1):]
        ys = ys[min(dup + 1, len(ys) - 1):]

        plt.plot(xs, ys, 'g')

        plt.plot(xs[0], ys[0], 'bo')
        plt.plot(xs[-1], ys[-1], 'ro')

        # TODO: Show the background of the Monte?
        img = imread('./montezuma.jpg')
        plt.imshow(img, zorder=0, extent=[0, 160, 0, 210])

    plt.savefig(filename)
    plt.close()

Example #5

0

Show file

File: OptionWrapper.py Project: sguo28/covering-options

    def setup_networks(self):
        print('f_func=', self.f_func)
        if self.f_func == 'fourier':
            # low_bound = np.asarray([0.0, 0.0, -2.0, -2.0])
            # up_bound = np.asarray([1.0, 1.0, 2.0, 2.0])
            features = Fourier(state_dim=self.obs_dim,
                               bound=self.obs_bound,
                               order=4)
            self.f_function = SpectrumFourier(obs_dim=self.obs_dim,
                                              feature=features,
                                              name=self.name)
        elif self.f_func == 'nn':
            self.f_function = SpectrumNetwork(self.sess,
                                              obs_dim=self.obs_dim,
                                              n_units=self.n_units,
                                              name=self.name)
        elif self.f_func == 'nnf':
            features = Monte()
            self.f_function = SpectrumNetwork(self.sess,
                                              obs_dim=self.obs_dim,
                                              feature=features,
                                              n_units=self.n_units,
                                              name=self.name)
        elif self.f_func == 'nns':
            features = Subset(state_dim=self.obs_dim,
                              feature_indices=[0, 1])  # TODO: parameterize
            self.f_function = SpectrumNetwork(self.sess,
                                              obs_dim=self.obs_dim,
                                              feature=features,
                                              n_units=self.n_units,
                                              name=self.name)
        elif self.f_func == 'nnc':
            # Convolutions
            self.f_function = SpectrumNetwork(self.sess,
                                              obs_dim=self.obs_dim,
                                              n_units=self.n_units,
                                              conv=True,
                                              name=self.name)
        elif self.f_func == 'rand':
            self.f_function = None
        elif self.f_func == 'agent':
            features = AgentPos(game='Freeway')
            self.f_function = SpectrumFourier(obs_dim=self.obs_dim,
                                              feature=features,
                                              name=self.name)
        else:
            print('f_func =', self.f_func)
            # print('len(ffnc)=', len(self.f_func))
            assert (False)

        if self.f_function is not None:
            self.f_function.initialize()

        if self.low_method == 'linear':
            # low_bound = np.asarray([0.0, 0.0, -2.0, -2.0])
            # up_bound = np.asarray([1.0, 1.0, 2.0, 2.0])
            features = Fourier(state_dim=self.obs_dim,
                               bound=self.obs_bound,
                               order=3)
            self.agent = LinearQAgent(actions=range(self.num_actions),
                                      feature=features,
                                      name=self.name)
        elif self.low_method == 'ddpg':
            # TODO: Using on-policy method is not good for options? is DDPG off-policy?
            self.agent = DDPGAgent(self.sess,
                                   obs_dim=self.obs_dim,
                                   action_dim=self.action_dim,
                                   action_bound=self.action_bound,
                                   name=self.name)
        elif self.low_method == 'dqn':
            self.agent = DQNAgent(self.sess,
                                  obs_dim=self.obs_dim,
                                  num_actions=self.num_actions,
                                  gamma=0.99,
                                  name=self.name)
        elif self.low_method == 'rand':
            if self.num_actions is None:
                self.agent = RandomContAgent(action_dim=self.action_dim,
                                             action_bound=self.action_bound,
                                             name=self.name)
            else:
                self.agent = RandomAgent(range(self.num_actions),
                                         name=self.name)
        else:
            print('low_method=', self.low_method)
            assert (False)
        self.agent.reset()

Example #6

0

Show file

def plot_op(op, args, mdp, state_bound, filename):
    print('visop')
    n_samples = 2000

    # TODO: Visualize the options according to the direction
    # direction = args.reverse

    if args.restoretraj:
        sample, _, _, _, _ = bfr.sample(n_samples)

        if args.task == 'MontezumaRevenge-ram-v0':
            feature = Monte()
            xs = [feature.feature(s, 0)[0] for s in samples]
            ys = [feature.feature(s, 0)[1] for s in samples]
        elif args.ffunction == 'nns':
            feature = Subset(state_dim, [0, 1])
            xs = [feature.feature(s, 0)[0] for s in samples]
            ys = [feature.feature(s, 0)[1] for s in samples]
        else:
            xs = [s.data[0] for s in samples]
            ys = [s.data[1] for s in samples]
    else:
        # TODO: bounds should be implemented inside the tasks.
        if 'Ant' in args.task or 'Point' in args.task:
            up_bound_x, low_bound_x, up_bound_y, low_bound_y = util.bounds(mdp)
            # low_bound_x = - 0.5
            # low_bound_y = -4.0
            # up_bound_x = -4.0 + 8.0 * 3.0
            # up_bound_y = -4.0 + 8.0 * 3.0
        elif args.tasktype == 'atari':
            low_bound_x = 0.0
            low_bound_y = 0.0
            up_bound_x = 160.0
            up_bound_y = 210.0
        else:
            low_bound_x = state_bound[0][0]
            low_bound_y = state_bound[0][1]
            up_bound_x = state_bound[1][0]
            up_bound_y = state_bound[1][1]
        xs = [
            random.uniform(low_bound_x, up_bound_x) for _ in range(n_samples)
        ]
        ys = [
            random.uniform(low_bound_y, up_bound_y) for _ in range(n_samples)
        ]
    fs = []
    for i in range(len(xs)):
        if args.task == 'MontezumaRevenge-ram-v0':
            obs = np.array([xs[i], ys[i]])
            obs = np.reshape(obs, (1, 2))
            f_value = op.f_function.f_from_features(obs)[0][0]
        elif args.ffunction == 'nns':
            obs = np.array([xs[i], ys[i]])
            obs = np.reshape(obs, (1, 2))
            f_value = op.f_function.f_from_features(obs)[0][0]
        else:
            s = mdp.get_init_state()
            s.data[0] = xs[i]
            s.data[1] = ys[i]
            f_value = op.f_function(s)[0][0]
        fs.append(f_value)

    # TODO: Find the best color mapping for visualization.
    # TODO: What is the best thing we can do for color blinds? Intensity of the plot?

    # TODO:
    # if args.reverse:

    cmap = matplotlib.cm.get_cmap('Blues')
    # cmap = matplotlib.cm.get_cmap('plasma')

    if args.reverse:
        term_th = op.upper_th
        normalize = matplotlib.colors.Normalize(
            vmin=min(fs),
            vmax=term_th)  # TODO: Does this give us an inverse direction?

        colors = []
        for value in fs:
            if value < term_th:
                # colors.append(cmap(1.0 - normalize(value)))
                colors.append(cmap(normalize(value)))
            else:
                # TODO: What is gray rgb?
                # colors.append((0.15, 0.15, 0.15))
                colors.append((0.15, 0.15, 0.15))
                # colors.append((0.0 , 0.0 , 1.0))

    else:
        term_th = op.lower_th
        normalize = matplotlib.colors.Normalize(vmin=term_th, vmax=max(fs))

        colors = []
        for value in fs:
            if value > term_th:
                colors.append(cmap(1.0 - normalize(value)))
            else:
                colors.append((0.15, 0.15, 0.15))
                # colors.append((0, 0, 0))
    # print('colors=', colors)
    # colors_np = np.asarray(colors)

    fig, ax = plt.subplots(figsize=(8, 6))
    ax.scatter(x=xs, y=ys, c=colors)

    cax, _ = matplotlib.colorbar.make_axes(ax)
    cbar = matplotlib.colorbar.ColorbarBase(cax, cmap=cmap, norm=normalize)

    # cax.plot([0, 1], [term_th] * 2, 'k')

    term, nonterm = 0, 0
    for f in fs:
        if f < term_th:
            term += 1
        else:
            nonterm += 1
    print(term, 'terms', nonterm, 'nonterms')

    plot_bg(mdp, args, fig, ax)

    plt.savefig(filename, bbox_inches='tight', pad_inches=0)
    plt.close()

Example #7

0

Show file

def plot_traj(traj, mdp, args, filename='trajectory'):
    fig, ax = plt.subplots(figsize=(8, 6))
    if args.tasktype == 'pinball':
        xs = [s.x for s in traj]
        ys = [s.y for s in traj]

    elif 'Point' in args.task or 'Ant' in args.task:
        # xs = [s.data[0] for s in traj]
        # ys = [s.data[1] for s in traj]
        xs = [maze_width(mdp) - s.data[1] for s in traj]
        ys = [maze_height(mdp) - s.data[0] for s in traj]

        print('x =', min(xs), ' to ', max(xs))
        print('y =', min(ys), ' to ', max(ys))

        import time
        for i in range(0, int(len(xs) / 3) + 1):
            # Plot every three states in a trajectory (otherwise it gets unreadable)
            mdp.env.wrapped_env.set_xy([xs[i * 3], ys[i * 3]])
            mdp.env.wrapped_env.set_ori(traj[i * 3].data[2])
            mdp.env.step([0, 0])
            time.sleep(0.1)
            img = mdp.env.render(mode='rgb_array')

            if i < 10:
                imsave(args.task + '_0' + str(i) + '.png', img)
            else:
                imsave(args.task + '_' + str(i) + '.png', img)

    elif args.task == 'MontezumaRevenge-ram-v0':
        feature = Monte()
        xs = [feature.feature(s, 0)[0] for s in traj]
        ys = [feature.feature(s, 0)[1] for s in traj]
        # Duplicate detection
        dup = 0
        for i in range(1, len(xs)):
            if int(xs[i]) == int(xs[0]) and int(ys[i]) == int(ys[0]):
                dup = i
        print('dup=', dup)

        xs = xs[min(dup + 1, len(xs) - 1):]
        ys = ys[min(dup + 1, len(ys) - 1):]
    elif args.tasktype == 'atari':
        init_s = traj[0]
        fig = np.reshape(init_s.data, (105, 80, 3))
        # fig = np.reshape(init_s.data, (210, 160, 3))
        plt.imshow(fig, vmin=0, vmax=255)
        plt.savefig(filename + '_init.pdf', bbox_inches='tight', pad_inches=0)
        goal_s = traj[-1]
        fig = np.reshape(goal_s.data, (105, 80, 3))
        plt.imshow(fig, vmin=0, vmax=255)
        plt.savefig(filename + '_goal.pdf', bbox_inches='tight', pad_inches=0)
        return
    elif args.tasktype == 'atariram':
        if args.task == "MsPacman-ram-v0":
            target = (210, 164, 74)  # Color of the pacman.
        elif args.task == "Freeway-ram-v0":
            target = (252, 252, 84)  # Chicken
        else:
            for s in traj:
                fig = np.asarray(s.data)
                shape = fig.shape
                colors = set()
                pos = None
                for x in range(shape[1]):
                    for y in range(shape[0]):
                        if tuple(fig[y][x]) not in colors:
                            colors.add(tuple(fig[y][x]))
                            pos.append((x, y) + tuple(fig[y][x]))

                print('pos=', pos)
            assert (False)

        xs = []
        ys = []

        for s in traj:
            fig = np.asarray(s.data)
            shape = fig.shape
            pos = None
            for x in range(shape[1]):
                for y in range(shape[0]):
                    if tuple(fig[y][x]) == target:
                        pos = (x, 210 - y)
                        break
                    if pos is not None:
                        break
            if pos is not None:
                xs.append(pos[0])
                ys.append(pos[1])
    else:
        print('bg Not implemented')

    plot_bg(mdp, args, fig, ax)

    plt.plot(xs, ys, 'g')

    plt.plot(xs[0], ys[0], 'bo')
    plt.plot(xs[-1], ys[-1], 'ro')

    plt.savefig(filename + '_' + str(len(traj)) + '.pdf',
                bbox_inches='tight',
                pad_inches=0)
    plt.close()

Example #8

0

Show file

def plot_vis(traj, args, mdp, filename):
    if args.tasktype == 'pinball':
        xs = [s.data[0] for s in traj]
        ys = [s.data[1] for s in traj]
    elif 'Point' in args.task or 'Ant' in args.task:
        xs = []
        ys = []

        up_bound_x, low_bound_x, up_bound_y, low_bound_y = util.bounds(mdp)
        for s in traj:
            if low_bound_x <= s.data[0] and s.data[0] <= up_bound_x and \
               low_bound_y<= s.data[1] and s.data[1] <= up_bound_y:
                xs.append(s.data[0])
                ys.append(s.data[1])
    elif args.task == 'MontezumaRevenge-ram-v0':
        feature = Monte()
        pairs = [feature.feature(s, 0) for s in traj]
        xs = [p[0] for p in pairs]
        ys = [p[1] for p in pairs]

        plt.xlim([0, 160])
        plt.ylim([0, 210])
    elif args.tasktype == 'atari':
        print('not implemented yet')
        assert (False)
    elif args.tasktype == 'atariram':
        if args.task == 'Freeway-ram-v0' or args.task == 'MsPacman-ram-v0':
            if args.task == 'Freeway-ram-v0':
                target = (252, 252, 84)  # Chicken
            elif args.task == 'MsPacman-ram-v0':
                target = (210, 164, 74)  # Color of the pacman.
            else:
                assert (False)
            xs = []
            ys = []

            for s in traj:
                fig = np.asarray(s.data)
                shape = fig.shape
                # print('shape=', shape)
                pos = None
                for x in range(shape[1]):
                    for y in range(shape[0]):
                        if tuple(fig[y][x]) == target:
                            pos = (x, 210 - y)
                            break
                    if pos is not None:
                        break
                if pos is not None:
                    xs.append(pos[0])
                    ys.append(pos[1])
        plt.xlim([0, 160])
        plt.ylim([0, 210])
    else:
        assert (False)

    fig, ax = plt.subplots(figsize=(8, 6))

    plot_bg(mdp, args, fig, ax)

    ax.plot(xs, ys, 'b.', alpha=0.5)  # Red is better for visualizing in Monte

    # init_s = mdp.domain.s0()[0]
    # print('init_s=', init_s,)
    # ax.plot(init_s[0], init_s[1], 'bo')
    # goal = mdp.domain.environment.target_pos
    # ax.plot(goal[0], goal[1], 'rx')
    # print('goal=', goal)

    plt.savefig(filename + ".pdf", bbox_inches='tight', pad_inches=0)
    plt.savefig(filename + ".png", bbox_inches='tight', pad_inches=0)
    plt.close()