def _render(self, mode='human', close=False): if close: return outfile = StringIO() if mode == 'ansi' else sys.stdout out = self.desc.copy().tolist() out = [[c.decode('utf-8') for c in line] for line in out] taxirow, taxicol, passidx, destidx = self.decode(self.s) def ul(x): return "_" if x == " " else x if passidx < 4: out[1 + taxirow][2 * taxicol + 1] = utils.colorize(out[1 + taxirow][2 * taxicol + 1], 'yellow', highlight=True) pi, pj = self.locs[passidx] out[1 + pi][2 * pj + 1] = utils.colorize(out[1 + pi][2 * pj + 1], 'blue', bold=True) else: # passenger in taxi out[1 + taxirow][2 * taxicol + 1] = utils.colorize(ul(out[1 + taxirow][2 * taxicol + 1]), 'green', highlight=True) di, dj = self.locs[destidx] out[1 + di][2 * dj + 1] = utils.colorize(out[1 + di][2 * dj + 1], 'magenta') outfile.write("\n".join(["".join(row) for row in out]) + "\n") if self.lastaction is not None: outfile.write(" ({})\n".format(["South", "North", "East", "West", "Pickup", "Dropoff"][self.lastaction])) else: outfile.write("\n") # No need to return anything for human if mode != 'human': return outfile
def _render_observation(self): x = self.read_head_position x_str = "Observation Tape : " for i in range(-2, self.input_width + 2): if i == x: x_str += colorize(self._get_str_obs(np.array([i])), 'green', highlight=True) else: x_str += self._get_str_obs(np.array([i])) x_str += "\n" return x_str
def _render_observation(self): x = self.read_head_position label = "Observation Grid : " x_str = "" for j in range(-1, self.rows + 1): if j != -1: x_str += " " * len(label) for i in range(-2, self.input_width + 2): if i == x[0] and j == x[1]: x_str += colorize(self._get_str_obs((i, j)), 'green', highlight=True) else: x_str += self._get_str_obs((i, j)) x_str += "\n" x_str = label + x_str return x_str
def _render(self, mode='human', close=False): if close: return outfile = StringIO() if mode == 'ansi' else sys.stdout row, col = self.s // self.ncol, self.s % self.ncol desc = self.desc.tolist() desc = [[c.decode('utf-8') for c in line] for line in desc] desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) if self.lastaction is not None: outfile.write(" ({})\n".format(["Left","Down","Right","Up"][self.lastaction])) else: outfile.write("\n") outfile.write("\n".join(''.join(line) for line in desc)+"\n") return outfile
def _render(self, mode='human', close=False): if close: # Nothing interesting to close return outfile = StringIO() if mode == 'ansi' else sys.stdout inp = "Total length of input instance: %d, step: %d\n" % (self.input_width, self.time) outfile.write(inp) x, y, action = self.read_head_position, self.write_head_position, self.last_action if action is not None: inp_act, out_act, pred = action outfile.write("=" * (len(inp) - 1) + "\n") y_str = "Output Tape : " target_str = "Targets : " if action is not None: pred_str = self.charmap[pred] x_str = self._render_observation() for i in range(-2, len(self.target) + 2): target_str += self._get_str_target(i) if i < y - 1: y_str += self._get_str_target(i) elif i == (y - 1): if action is not None and out_act == 1: color = 'green' if pred == self.target[i] else 'red' y_str += colorize(pred_str, color, highlight=True) else: y_str += self._get_str_target(i) outfile.write(x_str) outfile.write(y_str + "\n") outfile.write(target_str + "\n\n") if action is not None: outfile.write("Current reward : %.3f\n" % self.last_reward) outfile.write("Cumulative reward : %.3f\n" % self.episode_total_reward) move = self.MOVEMENTS[inp_act] outfile.write("Action : Tuple(move over input: %s,\n" % move) out_act = out_act == 1 outfile.write(" write to the output tape: %s,\n" % out_act) outfile.write(" prediction: %s)\n" % pred_str) else: outfile.write("\n" * 5) return outfile