def _render_observation(self): x = self.x if self.inp_dim == 1: x_str = "Observation Tape : " for i in range(-2, self.total_len + 2): if i == x: x_str += colorize(self._get_str_obs(np.array([i])), 'green', highlight=True) else: x_str += self._get_str_obs(np.array([i])) x_str += "\n" return x_str elif self.inp_dim == 2: label = "Observation Grid : " x_str = "" for j in range(-1, 3): if j != -1: x_str += " " * len(label) for i in range(-2, self.total_len + 2): if i == x[0] and j == x[1]: x_str += colorize(self._get_str_obs(np.array([i, j])), 'green', highlight=True) else: x_str += self._get_str_obs(np.array([i, j])) x_str += "\n" x_str = label + x_str return x_str else: assert False
def _render(self, mode='human', close=False): if close: return outfile = StringIO() if mode == 'ansi' else sys.stdout out = self.desc.copy().tolist() out = [[c.decode('utf-8') for c in line] for line in out] taxirow, taxicol, passidx, destidx = self.decode(self.s) def ul(x): return "_" if x == " " else x if passidx < 4: out[1+taxirow][2*taxicol+1] = utils.colorize(out[1+taxirow][2*taxicol+1], 'yellow', highlight=True) pi, pj = self.locs[passidx] out[1+pi][2*pj+1] = utils.colorize(out[1+pi][2*pj+1], 'blue', bold=True) else: # passenger in taxi out[1+taxirow][2*taxicol+1] = utils.colorize(ul(out[1+taxirow][2*taxicol+1]), 'green', highlight=True) di, dj = self.locs[destidx] out[1+di][2*dj+1] = utils.colorize(out[1+di][2*dj+1], 'magenta') outfile.write("\n".join(["".join(row) for row in out])+"\n") if self.lastaction is not None: outfile.write(" ({})\n".format(["South", "North", "East", "West", "Pickup", "Dropoff"][self.lastaction])) else: outfile.write("\n") # No need to return anything for human if mode != 'human': return outfile
def _render(self, mode='human', close=False): font = pg.font.SysFont("comicsansms", 72) buzzdude = random.randint(0,19) duzzdude = random.randint(0,29) duzzbude = random.randint(0,39) text_color = (210, 210, 210) #msg2 = font.render("Hello, World", True, (0, 128, 0)) if close: return outfile = StringIO() if mode == 'ansi' else sys.stdout out = self.desc.copy().tolist() out = [[c.decode('utf-8') for c in line] for line in out] taxirow, taxicol, passidx, destidx = self.decode(self.s) screen = pg.display.set_mode((550, 550)) def ul(x): return "Q" if x == " " else x if passidx < 4: out[1+taxirow][2*taxicol+1]= utils.colorize(out[1+taxirow][2*taxicol+1], 'yellow', highlight=True) print[1+taxirow] print taxirow pi, pj = self.locs[passidx] out[1+pi][2*pj+1]= utils.colorize(out[1+pi][2*pj+1], 'blue', bold=True) screen.blit(self.player2, (2*(29+pi),18*(pj+1))) #pg.display.flip() else: # passenger in taxi out[1+taxirow][2*taxicol+1]= utils.colorize(ul(out[1+taxirow][2*taxicol+1]), 'green', highlight=True) di, dj = self.locs[destidx] #screen.blit(msg2,(300,200)) #screen.blit(text,(300,200)) screen.blit(self.player4,(1+buzzdude,1+buzzdude)) screen.blit(self.player, (67*taxirow,67*taxicol+1)) screen.blit(self.player3, (duzzbude+5*(29+di),duzzdude+10*(dj+1))) #pg.display.flip() time.sleep(0.5) out[1+di][2*dj+1] = utils.colorize(out[1+di][2*dj+1], 'magenta') outfile.write("\n".join(["".join(row) for row in out])+"\n") if self.lastaction is not None: outfile.write(" ({})\n".format(["South", "North", "East", "West", "Pickup", "Dropoff"][self.lastaction])) tmp = self.lastaction #self.msg1 = outfile.write msg1= self.dir[tmp]#msg2 = msg1 msg2 = font.render(msg1,1,text_color) if self.lastaction is not None: screen.blit(msg2,(45,450)) pg.display.flip() else: outfile.write("\n") # No need to return anything for human if mode != 'human': return outfile
def render(self, mode='human'): """Renders the environment""" outfile = StringIO() if mode == 'ansi' else sys.stdout # Flip so highest y-value row is printed first desc = np.flipud(self.grid).astype(str) # Convert everything to human-readable symbols desc[desc == '0'] = '*' desc[desc == '1'] = 'H' desc[desc == '-1'] = 'P' # Obtain all x-y indices of elements x_free, y_free = np.where(desc == '*') x_h, y_h = np.where(desc == 'H') x_p, y_p = np.where(desc == 'P') # Decode if possible desc.tolist() try: desc = [[c.decode('utf-8') for c in line] for line in desc] except AttributeError: pass # All unfilled spaces are gray for unfilled_coords in zip(x_free, y_free): desc[unfilled_coords] = utils.colorize(desc[unfilled_coords], "gray") # All hydrophobic molecules are bold-green for hmol_coords in zip(x_h, y_h): desc[hmol_coords] = utils.colorize(desc[hmol_coords], "green", bold=True) # All polar molecules are cyan for pmol_coords in zip(x_p, y_p): desc[pmol_coords] = utils.colorize(desc[pmol_coords], "cyan") # Provide prompt for last action if self.last_action is not None: outfile.write(" ({})\n".format(["Left", "Down", "Up", "Right"][self.last_action])) else: outfile.write("\n") # Draw desc outfile.write("\n".join(''.join(line) for line in desc) + "\n") if mode != 'human': return outfile
def load_checkpoint(self): print(colorize(" [*] Loading checkpoints...", "green")) ckpt_path = tf.train.latest_checkpoint(self.checkpoint_dir) print(self.checkpoint_dir) print("ckpt_path:", ckpt_path) if ckpt_path: # self._saver = tf.train.import_meta_graph(ckpt_path + '.meta') self.saver.restore(self.sess, ckpt_path) print(colorize(" [*] Load SUCCESS: %s" % ckpt_path, "green")) return True else: print(colorize(" [!] Load FAILED: %s" % self.checkpoint_dir, "red")) return False
def render_observation(self): input_data, hint = self._get_str(self.input_data), self._get_str(self.hint) r_cursor = self.r_cursor x_str = 'Input : ' inp = input_data[:r_cursor] + \ colorize(input_data[r_cursor], 'magenta', highlight=True) + \ input_data[r_cursor+1:] hint = hint[:r_cursor] + \ colorize(hint[r_cursor], 'magenta', highlight=True) + \ hint[r_cursor+1:] x_str += inp + '\n' + ' ' * len(x_str) + hint + '\n' return x_str
def get_current_map_with_agents(self) -> np.array: """ Returns: Returns the current asci map in the numpy format """ # Copy map to work on out = self.desc.copy().tolist() out = [[c.decode('utf-8') for c in line] for line in out] taxis, fuels, passengers_start_coordinates, destinations, passengers_locations = self.state colors = ['yellow', 'red', 'white', 'green', 'cyan', 'crimson', 'gray', 'magenta'] * 5 colored = [False] * self.num_taxis def ul(x): """returns underline instead of spaces when called""" return "_" if x == " " else x for i, location in enumerate(passengers_locations): if location > 2: # Passenger is on a taxi taxi_row, taxi_col = taxis[location - 3] # Coloring taxi's coordinate on the map out[1 + taxi_row][2 * taxi_col + 1] = utils.colorize( out[1 + taxi_row][2 * taxi_col + 1], colors[location - 3], highlight=True, bold=True) colored[location - 3] = True else: # Passenger isn't in a taxi # Coloring passenger's coordinates on the map pi, pj = passengers_start_coordinates[i] out[1 + pi][2 * pj + 1] = utils.colorize(out[1 + pi][2 * pj + 1], 'blue', bold=True) for i, taxi in enumerate(taxis): if self.collided[i] == 0: # Taxi isn't collided taxi_row, taxi_col = taxi out[1 + taxi_row][2 * taxi_col + 1] = utils.colorize( ul(out[1 + taxi_row][2 * taxi_col + 1]), colors[i], highlight=True) else: # Collided! taxi_row, taxi_col = taxi out[1 + taxi_row][2 * taxi_col + 1] = utils.colorize( ul(out[1 + taxi_row][2 * taxi_col + 1]), 'gray', highlight=True) for dest in destinations: di, dj = dest out[1 + di][2 * dj + 1] = utils.colorize(out[1 + di][2 * dj + 1], 'magenta') return np.array(out)
def simulate_and_learn_policy(self): """Simulate the model and optimize the policy with the learned data. This consists of two steps: Step 1: Simulate trajectories with the model. Calls self.simulate_model(). Step 2: Implement a model free RL method that optimizes the policy. Calls self.learn_policy(). To be implemented by a Base Class. """ print(colorize("Optimizing Policy with Model Data", "yellow")) self.dynamical_model.eval() self.sim_dataset.reset() # Erase simulation data set before starting. with DisableGradient( self.dynamical_model), gpytorch.settings.fast_pred_var(): for i in tqdm(range(self.policy_opt_num_iter)): # Step 1: Compute the state distribution with torch.no_grad(): self.simulate_model() # Log last simulations. self._log_simulated_trajectory() # Step 2: Optimize policy self.learn_policy() if (self.sim_refresh_interval > 0 and (i + 1) % self.sim_refresh_interval == 0): self.sim_dataset.reset()
def _render(self, mode='human', close=False): if close: return outfile = StringIO() if mode == 'ansi' else sys.stdout pos_info = self.s >> self.num_goals row, col = pos_info // self.ncol, pos_info % self.ncol desc = self.desc.tolist() desc = [[c.decode('utf-8') for c in line] for line in desc] if self.s != self.TERMINAL_STATE: desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) if self.lastaction is not None and self.lastaction != 8: outfile.write(" ({})\n".format(["Left", "Down", "Right", "Up"][self.lastaction])) elif self.s == self.TERMINAL_STATE: outfile.write(" (EXITED)\n") else: outfile.write("\n") outfile.write("\n".join(''.join(line) for line in desc) + "\n") return outfile
def render(self, mode='human'): outfile = StringIO() if mode == 'ansi' else sys.stdout snapshot = "" out = self.desc.copy().tolist() out = [[c.decode('utf-8') for c in line] for line in out] for position in self.position_to_coordinates: if position != self.position_in_taxi: row, col = self.position_to_coordinates[position] out[row + 1][2 * col + 1] = utils.colorize(str(position), 'yellow', highlight=True) outfile.write("\n".join(["".join(row) for row in out]) + "\n") #positions = self.decode(self.no_to_state[self.s]) #What is the purpose of decode state_objs = self.states[self.no_to_state[self.s]] taxi, passengers = state_objs[0], state_objs[1:] taxi_row, taxi_col = self.position_to_coordinates.get(taxi.get_location()) for passenger in passengers: passenger_id = passenger.get_passenger_id() if passenger.get_location() == self.position_in_taxi: #in Taxi snapshot += "Passenger " + str(passenger_id) + " in taxi with " snapshot += str(passenger_id) + " as destination\n" else: snapshot += "Passenger " + str(passenger) + " at " + str(passenger.get_location()) snapshot += " with " + str(passenger.get_destination()) + " as destination\n" if taxi.get_no_of_passengers() == taxi.get_max_capacity(): snapshot += "Taxi fully occupied at " + str(taxi.get_location()) else: snapshot += "Taxi not fully occupied at " + str(taxi.get_location()) outfile.write(snapshot)
def dense_nn(inputs, layers_sizes, name="mlp", reuse=False, output_fn=None, dropout_keep_prob=None, batch_norm=False, training=True): print(colorize("Building mlb {} | sizes: {}".format( name, [inputs.shape[0]] + layers_sizes), "green" )) with tf.variable_scope(name, reuse=reuse): out = inputs for i, size in enumerate(layers_sizes): print("Layer:", name + '_l' + str(i), size) if i > 0 and dropout_keep_prob is not None and training: out = tf.nn.dropout(out, dropout_keep_prob) out = tf.layers.dense( out, size, activation=tf.nn.relu if i < len(layers_sizes) - 1 else None, kernel_initializer=tf.contrib.layers.xavier_initializer(), name=name + '_l' + str(i), reuse=reuse ) if batch_norm: out = tf.layers.batch_normalization(out, training = training) if output_fn: out = output_fn(out) return out
def warn(msg, *args, category=None, stacklevel=1): if MIN_LEVEL <= WARN: warnings.warn( colorize("%s: %s" % ("WARN", msg % args), "yellow"), category=category, stacklevel=stacklevel + 1, )
def warn(msg, *args, category=None, stacklevel=1): if MIN_LEVEL <= WARN: warnings.warn( colorize(f"WARN: {msg % args}", "yellow"), category=category, stacklevel=stacklevel + 1, )
def render_gridworld( # pylint: disable=inconsistent-return-statements self, mode='human'): if mode not in ('human', 'ansi'): raise ValueError('Only `human` and `ansi` modes are supported') # stream where to send the string representation of the env outfile = sys.stdout if mode == 'human' else io.StringIO() if self.action_prev is not None: ai = self.action_prev.item() print(f'action: {self.model.actions[ai]}', file=outfile) if self.state < 5: i = self.state.item() // 4 j = self.state.item() % 4 else: i = (self.state.item() + 1) // 4 j = (self.state.item() + 1) % 4 desc = [['.', '.', '.', '+'], ['.', ' ', '.', '-'], ['.', '.', '.', '.']] desc[i][j] = colorize(desc[i][j], 'red', highlight=True) desc = '\n'.join(''.join(line) for line in desc) print(desc, file=outfile) if mode == 'ansi': with contextlib.closing(outfile): return outfile.getvalue()
def render(self, mode='human'): print('------------------------') print('State is in question: {0}'.format(bool(self.pos_idx['q']))) print('Sentence number:', colorize(str(self.pos_idx['sen']), 'yellow')) print('Current word state:', colorize(str(self.data.QA.IVOCAB[self.state]), 'cyan')) if self.last_action == self.current_qa[2]: print(colorize('Correct action! The answer is', 'green'), \ self.data.QA.IVOCAB[self.last_action]) if self.pos_idx['q'] == 1 and self.pos_idx['word'] > 1: self.num_correct += 1 else: None print('Correct answers:', str(self.num_correct)) print('Current QA:', colorize(str(self.i), 'magenta')) print('------------------------')
def dense_nn(inputs, layers_sizes, name="mlp", reuse=None, dropout_keep_prob=None, batch_norm=False, training=True): print( colorize( "Building mlp {} | sizes: {}".format(name, [inputs.shape[0]] + layers_sizes), "green")) with tf.variable_scope(name): for i, size in enumerate(layers_sizes): print("Layer:", name + '_l' + str(i), size) if i > 0 and dropout_keep_prob is not None and training: # No dropout on the input layer. inputs = tf.nn.dropout(inputs, dropout_keep_prob) inputs = tf.layers.dense( inputs, size, # Add relu activation only for internal layers. activation=tf.nn.relu if i < len(layers_sizes) - 1 else None, kernel_initializer=tf.contrib.layers.xavier_initializer(), reuse=reuse, name=name + '_l' + str(i)) if batch_norm: inputs = tf.layers.batch_normalization(inputs, training=training) return inputs
def render(self, mode="human"): """ This methods provides the option to render the environment's behavior to a window which should be readable to the human eye if mode is set to 'human'. """ outfile = StringIO() if mode == "ansi" else sys.stdout row, col = self.state // self.ncol, self.state % self.ncol grid_map = self.grid_map.tolist() grid_map = [[char.decode("utf-8") for char in line] for line in grid_map] grid_map[row][col] = utils.colorize(grid_map[row][col], "red", highlight=True) if self.last_action is not None: last_action = [ "Left", "Down", "Right", "Up", "Boost_Left", "Boost_Down", "Boost_Right", "Boost_Up", ][self.last_action] outfile.write(f" ({last_action})\n") else: outfile.write("\n") outfile.write("\n".join("".join(line) for line in grid_map) + "\n") if mode != "human": with closing(outfile): return outfile.getvalue()
def _render(self, mode='human', close=False): if close: return outfile = StringIO() if mode == 'ansi' else sys.stdout for s in range(self.nS): position = np.unravel_index(s, self.shape) # print(self.s) if self.s == s: output = utils.colorize(" x ", "red", highlight=True) elif position == (3, 11): output = " T " elif self._cliff[position]: output = " C " else: output = " o " if position[1] == 0: output = output.lstrip() if position[1] == self.shape[1] - 1: output = output.rstrip() output += "\n" outfile.write(output) outfile.write("\n")
def render(self, mode="human", plot=False): outfile = StringIO() if mode == "ansi" else sys.stdout desc = self.desc.tolist() desc = [[c.decode("utf-8") for c in line] for line in desc] grid = self.decode(self.s) for row, col in zip(*np.where(grid == 1)): desc[row][col] = utils.colorize("*", "red", highlight=True, bold=True) if self.lastaction is not None: outfile.write(" Turn off ({},{})\n".format(*self._action_to_pos(self.lastaction))) else: outfile.write("\n") outfile.write("\n".join("".join(line) for line in desc) + "\n") if plot: outfile.write("\n") plt.pcolormesh(grid, edgecolors="w", linewidth=2, cmap="seismic") plt.axis("off") # remove axis plt.gca().invert_yaxis() # pcolormesh invert y axis so we re-invert it plt.gca().set_aspect("equal") # display the grid as a square plt.show() # No need to return anything for human if mode != "human": with closing(outfile): return outfile.getvalue()
def render_state(state_id, nrow, ncol, storm_maps, terminal_pos): def _decode(s): ''' s: a number that represent the state. return: a tuple like (row, col), storm_index ''' return ((s % (nrow * ncol)) // ncol, s % ncol), s // (ncol * nrow) def print_grid(air_map): for i in range(air_map.shape[0]): print(air_map[i, :].tostring().decode('utf-8')) # Generate map: air_map = np.zeros((nrow, ncol), dtype='U10') for i in range(nrow): for j in range(ncol): air_map[i, j] = '.' air_map[terminal_pos] = 'E' pos, storm = _decode(state_id) storm_map_now = storm_maps[storm] air_map[storm_map_now > .5] = 'S' air_map[pos] = utils.colorize('A', 'red', highlight=True) print_grid(air_map) print('') return
def _render_walk(self): chars = ['#'] * self._walk_len c = 'green' if self._reached_right_edge() else 'red' chars[self._position] = utils.colorize(chars[self._position], color=c, highlight=True) return "".join(chars)
def dense_nn(inputs, layers_sizes, name="mlp", output_fn=None, dropout_keep_prob=None, batch_norm=False, training=True): print( colorize( "Building mlp {} | sizes: {}".format(name, [inputs.shape[0]] + layers_sizes), "green")) model = tf.keras.Sequential() for i, layer in enumerate(layers_sizes): print("Layer:", name + '_l' + str(i), layer) if i > 0 and dropout_keep_prob is not None and training: model.add(tf.keras.layers.Dropout(dropout_keep_prob)) model.add( tf.keras.layers.Dense( layer, activation='relu' if i < len(layers_sizes) - 1 else None, name=name + '_l' + str(i))) if batch_norm: model.add(tf.keras.layers.BatchNormalization()) model.build(input_shape=(4, )) return model
def __init__( self, env: PassiveEnvironment, first_epoch_only: bool = False, wandb_prefix: str = None, ): super().__init__(env) # Metrics mapping from step to the metrics at that step. self._metrics: Dict[int, ClassificationMetrics] = defaultdict(Metrics) self.first_epoch_only = first_epoch_only self.wandb_prefix = wandb_prefix # Counter for the number of steps. self._steps: int = 0 assert isinstance(self.env.unwrapped, PassiveEnvironment) if not self.env.unwrapped.pretend_to_be_active: warnings.warn( RuntimeWarning( colorize( "Your online performance " + ("during the first epoch " if self.first_epoch_only else "") + "on this environment will be monitored! " "Since this env is Passive, i.e. a Supervised Learning " "DataLoader, the Rewards (y) will be withheld until " "actions are passed to the 'send' method. Make sure that " "your training loop can handle this small tweak.", color="yellow", ))) self.env.unwrapped.pretend_to_be_active = True self.__epochs = 0
def render(self, mode='human', fidelity=0): outfile = StringIO() if mode == 'ansi' else sys.stdout out = np.asarray(MAP[fidelity], dtype='c').tolist() out = [[c.decode('utf-8') for c in line] for line in out] def ul(x): return "_" if x == " " else x if self.s is not None: row, col = self.decode(self.s) out[row][col] = utils.colorize(ul(out[row][col]), 'green', highlight=True) outfile.write('____________________\n') outfile.write("\n".join(['|' + "".join(row) + '|' for row in out]) + "\n") outfile.write('‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾\n') if self.lastaction is not None: outfile.write(" ({})\n".format(["South", "North", "East", "West"][self.lastaction])) else: outfile.write("\n") if self.lastfidelity is not None: outfile.write(" (Fidelity: {})\n".format(self.lastfidelity)) else: outfile.write("\n") # No need to return anything for human if mode != 'human': with closing(outfile): return outfile.getvalue()
def _runtime_score(self) -> float: # TODO: function that takes the total runtime in seconds and returns a # normalized float score between 0 and 1. runtime_seconds = self._runtime if self._runtime is None: warnings.warn( RuntimeWarning( colorize( "Runtime is None! Returning runtime score of 0.\n (Make sure the " "Setting had its `monitor_training_performance` attr set to True!", color="red", ))) return 0 runtime_hours = runtime_seconds / 3600 # Get the maximum runtime for this type of Results (and Setting) min_runtime_hours = type(self).min_runtime_hours max_runtime_hours = type(self).max_runtime_hours assert 0 <= min_runtime_hours < max_runtime_hours assert 0 < runtime_hours if runtime_hours <= min_runtime_hours: return 1.0 if max_runtime_hours <= runtime_hours: return 0.0 return 1 - ((runtime_hours - min_runtime_hours) / (max_runtime_hours - min_runtime_hours))
def render(self, mode='human', close=False): outfile = StringIO() if mode == 'ansi' else sys.stdout out = self.desc.copy().tolist() out = [[c.decode('utf-8') for c in line] for line in out] pointer_row, pointer_col, hasPlayer = self.decode(self.s) def ul(x): return "_" if x == " " else x out[1 + pointer_row][pointer_col + 1] = utils.colorize( out[1 + pointer_row][pointer_col + 1], 'yellow', highlight=True) outfile.write("\n".join(["".join(row) for row in out]) + "\n") if self.lastaction is not None: outfile.write(" ({}),".format(pointer_row)) outfile.write(" ({})".format(pointer_col)) outfile.write(" ({})\n".format(hasPlayer)) outfile.write(" ({})\n".format( ["Down", "Up", "Right", "Left", "Player"][self.lastaction])) else: outfile.write("\n") # No need to return anything for human if mode != 'human': with closing(outfile): return outfile.getvalue()
def render(self, mode='human'): outfile = StringIO() if mode == 'ansi' else sys.stdout if EXPLICIT_ABSORBING and self.s == self.s_absorb: outfile.write("In absorbing state \n") outfile.write("\n".join(''.join(line) for line in self.last_desc) + "\n") else: row, col = self.s // self.ncol, self.s % self.ncol desc = self.desc.tolist() desc = [[c.decode('utf-8') for c in line] for line in desc] # import pdb; pdb.set_trace() desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) if self.lastaction is not None: outfile.write(" ({})\n".format( ["Left", "Down", "Right", "Up"][self.lastaction])) else: outfile.write("\n") outfile.write("\n".join(''.join(line) for line in desc) + "\n") self.last_desc = desc # self.last_row = row # self.last_col = col if mode != 'human': with closing(outfile): return outfile.getvalue()
def _render(self, mode='human', close=False): if close: # 初始化环境Environment的时候不显示 return outfile = StringIO() if mode == 'ansi' else sys.stdout desc = self.desc.tolist() desc = [[c.decode('utf-8') for c in line] for line in desc] state_grid = np.arange(self.nS).reshape(self.shape) it = np.nditer(state_grid, flags=['multi_index']) while not it.finished: s = it.iterindex y, x = it.multi_index # 对于当前状态用红色标注 if self.s == s: desc[y][x] = utils.colorize(desc[y][x], "red", highlight=True) it.iternext() outfile.write("\n".join(' '.join(line) for line in desc) + "\n") if mode != 'human': return outfile
def warn(msg: str, *args) -> None: """ Custom definition of :py:func:`gym.logger.warn` function. """ if logger.MIN_LEVEL <= logger.WARN: warnings.warn(colorize('%s: %s' % ('WARN', msg % args), 'yellow'), stacklevel=2)
def _update_model_posterior(self, last_trajectory): """Update model posterior of GP-models with new data.""" if isinstance(self.dynamical_model.base_model, ExactGPModel): observation = stack_list_of_tuples(last_trajectory) # Parallelize. if observation.action.shape[-1] > self.dynamical_model.dim_action[ 0]: observation.action = observation.action[ ..., :self.dynamical_model.dim_action[0]] for transform in self.train_set.transformations: observation = transform(observation) print(colorize("Add data to GP Model", "yellow")) self.dynamical_model.base_model.add_data(observation.state, observation.action, observation.next_state) print(colorize("Summarize GP Model", "yellow")) self.dynamical_model.base_model.summarize_gp()
def load_model(self): print(colorize(" [*] Loading checkpoints...", "green")) ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir) print(self.checkpoint_dir, ckpt) if ckpt and ckpt.model_checkpoint_path: ckpt_name = os.path.basename(ckpt.model_checkpoint_path) print(ckpt_name) fname = os.path.join(self.checkpoint_dir, ckpt_name) print(fname) self.saver.restore(self.sess, fname) print(colorize(" [*] Load SUCCESS: %s" % fname, "green")) return True else: print(colorize(" [!] Load FAILED: %s" % self.checkpoint_dir, "red")) return False
def warn(msg: str, *args) -> None: """ Custom definition of :py:func:`gym.logger.warn` function. """ if logger.MIN_LEVEL <= logger.WARN: warnings.warn(colorize("%s: %s" % ("WARN", msg % args), "yellow"), stacklevel=2)
def lstm_net(inputs, layers_sizes, name='lstm', step_size=16, lstm_layers=1, lstm_size=256, pre_lstm_dense_layer=None, dropout_keep_prob=None, training=True): """inputs = (batch_size * step_size, *observation_size) """ print(colorize("Building lstm net " + name, "green")) print("inputs.shape =", inputs.shape) state_size = inputs.shape.as_list()[1] inputs = tf.reshape(inputs, [-1, step_size, state_size]) print("reshaped inputs.shape =", inputs.shape) def _make_cell(): cell = tf.nn.rnn_cell.LSTMCell(lstm_size, state_is_tuple=True, reuse=not training) if training and dropout_keep_prob: cell = tf.contrib.rnn.DropoutWrapper( cell, output_keep_prob=dropout_keep_prob) return cell with tf.variable_scope(name): if pre_lstm_dense_layer: inputs = tf.nn.relu( dense_nn(inputs, [pre_lstm_dense_layer], name='pre_lstm')) with tf.variable_scope('lstm_cells'): # Before transpose, inputs.get_shape() = (batch_size, num_steps, lstm_size) # After transpose, inputs.get_shape() = (num_steps, batch_size, lstm_size) lstm_inputs = tf.transpose(inputs, [1, 0, 2]) cell = tf.contrib.rnn.MultiRNNCell( [_make_cell() for _ in range(lstm_layers)], state_is_tuple=True) lstm_outputs, lstm_states = tf.nn.dynamic_rnn(cell, lstm_inputs, dtype=tf.float32) # transpose back. lstm_outputs = tf.transpose(lstm_outputs, [1, 0, 2]) print("cell =", cell) print("lstm_states =", lstm_states) print("lstm_outputs.shape =", lstm_outputs.shape) outputs = dense_nn(lstm_outputs, layers_sizes, name="outputs") print("outputs.shape =", outputs.shape) outputs = tf.reshape(outputs, [-1, layers_sizes[-1]]) print("reshaped outputs.shape =", outputs.shape) return outputs
def render_observation(self, observation, outfile=sys.stdout): out = observation.copy().tolist() out = [[self.MapChars[i] for i in line] for line in out] b = utils.colorize(' ', 'gray', highlight=True) bline = "{}{}{}\n".format(b, b * observation.shape[1], b) outfile.write(bline) outfile.write("\n".join([(b + "".join(row) + b) for row in out]) + "\n") outfile.write(bline)
def render_observation(self): x = self.read_head_position x_str = "Observation Tape : " for i in range(-2, self.input_width + 2): if i == x: x_str += colorize(self._get_str_obs(np.array([i])), 'green', highlight=True) else: x_str += self._get_str_obs(np.array([i])) x_str += "\n" return x_str
def main(): parser = argparse.ArgumentParser() parser.add_argument("envid") parser.add_argument("outfile") parser.add_argument("--gymdir") args = parser.parse_args() if args.gymdir: sys.path.insert(0, args.gymdir) import gym from gym import utils print utils.colorize("gym directory: %s"%path.dirname(gym.__file__), "yellow") env = gym.make(args.envid) agent = RandomAgent(env.action_space) alldata = {} for i in xrange(2): np.random.seed(i) data = rollout(env, agent, env.spec.tags['wrapper_config.TimeLimit.max_episode_steps']) for (k, v) in data.items(): alldata["%i-%s"%(i, k)] = v np.savez(args.outfile, **alldata)
def _render(self,mode='human', close=False): if close: return outfile = StringIO() if mode == 'ansi' else sys.stdout row = col = 4 desc = self.matrix.tolist() desc = [[c.decode('utf-8') for c in line] for line in desc] desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) outfile.write("\n".join(''.join(line) for line in desc)+"\n") if self.lastaction is not None: outfile.write(" ({})\n".format(["Left", "Down", "Right", "Up"][self.lastaction])) else: outfile.write("\n") return outfile
def render_observation(self): x = self.read_head_position label = "Observation Grid : " x_str = "" for j in range(-1, self.rows+1): if j != -1: x_str += " " * len(label) for i in range(-2, self.input_width + 2): if i == x[0] and j == x[1]: x_str += colorize(self._get_str_obs((i, j)), 'green', highlight=True) else: x_str += self._get_str_obs((i, j)) x_str += "\n" x_str = label + x_str return x_str
def render(self, mode='human'): outfile = StringIO() if mode == 'ansi' else sys.stdout row, col = self.s // self.ncol, self.s % self.ncol desc = self.desc.tolist() desc = [[c.decode('utf-8') for c in line] for line in desc] desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) if self.lastaction is not None: outfile.write(" ({})\n".format(["Left","Down","Right","Up"][self.lastaction])) else: outfile.write("\n") outfile.write("\n".join(''.join(line) for line in desc)+"\n") if mode != 'human': return outfile
def _render(self, mode='human', close=False): if close: return outfile = StringIO.StringIO() if mode == 'ansi' else sys.stdout row, col = self.s // self.ncol, self.s % self.ncol desc = self.desc.tolist() desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) outfile.write("\n".join("".join(row) for row in desc)+"\n") if self.lastaction is not None: outfile.write(" ({})\n".format(self.get_action_meanings()[self.lastaction])) else: outfile.write("\n") return outfile
def _render(self, mode="human", close=False): if close: return outfile = StringIO() if mode == "ansi" else sys.stdout row, col = self.s // self.ncol, self.s % self.ncol desc = self.desc.tolist() desc = [[c.decode("utf-8") for c in line] for line in desc] desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) outfile.write("\n".join("".join(line) for line in desc) + "\n") if self.lastaction is not None: outfile.write(" ({})\n".format(["Left", "Down", "Right", "Up"][self.lastaction])) else: outfile.write("\n") return outfile
def _render(self, mode='human', close=False): if close: # Nothing interesting to close return outfile = StringIO() if mode == 'ansi' else sys.stdout inp = "Total length of input instance: %d, step: %d\n" % (self.input_width, self.time) outfile.write(inp) x, y, action = self.read_head_position, self.write_head_position, self.last_action if action is not None: inp_act, out_act, pred = action outfile.write("=" * (len(inp) - 1) + "\n") y_str = "Output Tape : " target_str = "Targets : " if action is not None: pred_str = self.charmap[pred] x_str = self._render_observation() for i in range(-2, len(self.target) + 2): target_str += self._get_str_target(i) if i < y - 1: y_str += self._get_str_target(i) elif i == (y - 1): if action is not None and out_act == 1: color = 'green' if pred == self.target[i] else 'red' y_str += colorize(pred_str, color, highlight=True) else: y_str += self._get_str_target(i) outfile.write(x_str) outfile.write(y_str + "\n") outfile.write(target_str + "\n\n") if action is not None: outfile.write("Current reward : %.3f\n" % self.last_reward) outfile.write("Cumulative reward : %.3f\n" % self.episode_total_reward) move = self.MOVEMENTS[inp_act] outfile.write("Action : Tuple(move over input: %s,\n" % move) out_act = out_act == 1 outfile.write(" write to the output tape: %s,\n" % out_act) outfile.write(" prediction: %s)\n" % pred_str) else: outfile.write("\n" * 5) return outfile
def _render(self, mode='human', close=False): if close: # Nothing interesting to close return outfile = StringIO() if mode == 'ansi' else sys.stdout inp = "Total length of input instance: %d, step: %d\n" % (self.total_len, self.time) outfile.write(inp) x, y, action = self.x, self.y, self.last_action if action is not None: inp_act, out_act, pred = action outfile.write("=" * (len(inp) - 1) + "\n") y_str = "Output Tape : " target_str = "Targets : " if action is not None: if self.chars: pred_str = chr(pred + ord('A')) else: pred_str = str(pred) x_str = self._render_observation() max_len = int(self.total_reward) + 1 for i in range(-2, max_len): if i not in self.target: y_str += " " continue target_str += self._get_str_target(i) if i < y - 1: y_str += self._get_str_target(i) elif i == (y - 1): if action is not None and out_act == 1: if pred == self.target[i]: y_str += colorize(pred_str, 'green', highlight=True) else: y_str += colorize(pred_str, 'red', highlight=True) else: y_str += self._get_str_target(i) outfile.write(x_str) outfile.write(y_str + "\n") outfile.write(target_str + "\n\n") if action is not None: outfile.write("Current reward : %.3f\n" % self.reward) outfile.write("Cumulative reward : %.3f\n" % self.sum_reward) move = "" if inp_act == 0: move = "left" elif inp_act == 1: move = "right" elif inp_act == 2: move += "up" elif inp_act == 3: move += "down" outfile.write("Action : Tuple(move over input: %s,\n" % move) if out_act == 1: out_act = "True" else: out_act = "False" outfile.write(" write to the output tape: %s,\n" % out_act) outfile.write(" prediction: %s)\n" % pred_str) else: outfile.write("\n" * 5) return outfile
def warn(msg, *args): if MIN_LEVEL <= WARN: print(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
def error(msg, *args): if MIN_LEVEL <= ERROR: print(colorize('%s: %s'%('ERROR', msg % args), 'red'))