Example #1
0
 def _render_observation(self):
     x = self.x
     if self.inp_dim == 1:
         x_str =      "Observation Tape    : "
         for i in range(-2, self.total_len + 2):
             if i == x:
                 x_str += colorize(self._get_str_obs(np.array([i])), 'green', highlight=True)
             else:
                 x_str += self._get_str_obs(np.array([i]))
         x_str += "\n"
         return x_str
     elif self.inp_dim == 2:
         label =      "Observation Grid    : "
         x_str = ""
         for j in range(-1, 3):
             if j != -1:
                 x_str += " " * len(label)
             for i in range(-2, self.total_len + 2):
                 if i == x[0] and j == x[1]:
                     x_str += colorize(self._get_str_obs(np.array([i, j])), 'green', highlight=True)
                 else:
                     x_str += self._get_str_obs(np.array([i, j]))
             x_str += "\n"
         x_str = label + x_str
         return x_str
     else:
         assert False
Example #2
0
    def _render(self, mode='human', close=False):
        if close:
            return

        outfile = StringIO() if mode == 'ansi' else sys.stdout

        out = self.desc.copy().tolist()
        out = [[c.decode('utf-8') for c in line] for line in out]
        taxirow, taxicol, passidx, destidx = self.decode(self.s)
        def ul(x): return "_" if x == " " else x
        if passidx < 4:
            out[1+taxirow][2*taxicol+1] = utils.colorize(out[1+taxirow][2*taxicol+1], 'yellow', highlight=True)
            pi, pj = self.locs[passidx]
            out[1+pi][2*pj+1] = utils.colorize(out[1+pi][2*pj+1], 'blue', bold=True)
        else: # passenger in taxi
            out[1+taxirow][2*taxicol+1] = utils.colorize(ul(out[1+taxirow][2*taxicol+1]), 'green', highlight=True)

        di, dj = self.locs[destidx]
        out[1+di][2*dj+1] = utils.colorize(out[1+di][2*dj+1], 'magenta')
        outfile.write("\n".join(["".join(row) for row in out])+"\n")
        if self.lastaction is not None:
            outfile.write("  ({})\n".format(["South", "North", "East", "West", "Pickup", "Dropoff"][self.lastaction]))
        else: outfile.write("\n")

        # No need to return anything for human
        if mode != 'human':
            return outfile
Example #3
0
    def _render(self, mode='human', close=False):
        font = pg.font.SysFont("comicsansms", 72)
        buzzdude = random.randint(0,19)
        duzzdude = random.randint(0,29)
        duzzbude = random.randint(0,39)
        text_color = (210, 210, 210)
        #msg2 = font.render("Hello, World", True, (0, 128, 0))
        if close:
            return

        outfile = StringIO() if mode == 'ansi' else sys.stdout

        out = self.desc.copy().tolist()
        out = [[c.decode('utf-8') for c in line] for line in out]
        taxirow, taxicol, passidx, destidx = self.decode(self.s)
        screen = pg.display.set_mode((550, 550))
        def ul(x): return "Q" if x == " " else x
        if passidx < 4:
            out[1+taxirow][2*taxicol+1]= utils.colorize(out[1+taxirow][2*taxicol+1], 'yellow', highlight=True)
            print[1+taxirow]
            print taxirow
            
            
            pi, pj = self.locs[passidx]
            out[1+pi][2*pj+1]= utils.colorize(out[1+pi][2*pj+1], 'blue', bold=True)
            
            screen.blit(self.player2, (2*(29+pi),18*(pj+1)))
            #pg.display.flip()
        else: # passenger in taxi
            out[1+taxirow][2*taxicol+1]= utils.colorize(ul(out[1+taxirow][2*taxicol+1]), 'green', highlight=True)
        
        di, dj = self.locs[destidx]
        #screen.blit(msg2,(300,200))
        #screen.blit(text,(300,200))
        screen.blit(self.player4,(1+buzzdude,1+buzzdude))
        screen.blit(self.player, (67*taxirow,67*taxicol+1))
        screen.blit(self.player3, (duzzbude+5*(29+di),duzzdude+10*(dj+1)))
        #pg.display.flip()
        time.sleep(0.5)
        out[1+di][2*dj+1] = utils.colorize(out[1+di][2*dj+1], 'magenta')
        outfile.write("\n".join(["".join(row) for row in out])+"\n")
        if self.lastaction is not None:
            outfile.write("  ({})\n".format(["South", "North", "East", "West", "Pickup", "Dropoff"][self.lastaction]))
            tmp = self.lastaction
            #self.msg1 = outfile.write
            msg1= self.dir[tmp]#msg2 = msg1
            msg2 = font.render(msg1,1,text_color)
            if self.lastaction is not None:
                screen.blit(msg2,(45,450))
            pg.display.flip()
        else: outfile.write("\n")

        # No need to return anything for human
        if mode != 'human':
            return outfile
Example #4
0
    def render(self, mode='human'):
        """Renders the environment"""

        outfile = StringIO() if mode == 'ansi' else sys.stdout
        # Flip so highest y-value row is printed first
        desc = np.flipud(self.grid).astype(str)

        # Convert everything to human-readable symbols
        desc[desc == '0'] = '*'
        desc[desc == '1'] = 'H'
        desc[desc == '-1'] = 'P'

        # Obtain all x-y indices of elements
        x_free, y_free = np.where(desc == '*')
        x_h, y_h = np.where(desc == 'H')
        x_p, y_p = np.where(desc == 'P')

        # Decode if possible
        desc.tolist()
        try:
            desc = [[c.decode('utf-8') for c in line] for line in desc]
        except AttributeError:
            pass

        # All unfilled spaces are gray
        for unfilled_coords in zip(x_free, y_free):
            desc[unfilled_coords] = utils.colorize(desc[unfilled_coords],
                                                   "gray")

        # All hydrophobic molecules are bold-green
        for hmol_coords in zip(x_h, y_h):
            desc[hmol_coords] = utils.colorize(desc[hmol_coords],
                                               "green",
                                               bold=True)

        # All polar molecules are cyan
        for pmol_coords in zip(x_p, y_p):
            desc[pmol_coords] = utils.colorize(desc[pmol_coords], "cyan")

        # Provide prompt for last action
        if self.last_action is not None:
            outfile.write("  ({})\n".format(["Left", "Down", "Up",
                                             "Right"][self.last_action]))
        else:
            outfile.write("\n")

        # Draw desc
        outfile.write("\n".join(''.join(line) for line in desc) + "\n")

        if mode != 'human':
            return outfile
Example #5
0
    def load_checkpoint(self):
        print(colorize(" [*] Loading checkpoints...", "green"))
        ckpt_path = tf.train.latest_checkpoint(self.checkpoint_dir)
        print(self.checkpoint_dir)
        print("ckpt_path:", ckpt_path)

        if ckpt_path:
            # self._saver = tf.train.import_meta_graph(ckpt_path + '.meta')
            self.saver.restore(self.sess, ckpt_path)
            print(colorize(" [*] Load SUCCESS: %s" % ckpt_path, "green"))
            return True
        else:
            print(colorize(" [!] Load FAILED: %s" % self.checkpoint_dir,
                           "red"))
            return False
    def render_observation(self):
        input_data, hint = self._get_str(self.input_data), self._get_str(self.hint)

        r_cursor = self.r_cursor
        x_str = 'Input               : '

        inp = input_data[:r_cursor] + \
              colorize(input_data[r_cursor], 'magenta', highlight=True) + \
              input_data[r_cursor+1:]
        hint = hint[:r_cursor] + \
               colorize(hint[r_cursor], 'magenta', highlight=True) + \
               hint[r_cursor+1:]
        x_str += inp + '\n' + ' ' * len(x_str) + hint + '\n'

        return x_str
    def get_current_map_with_agents(self) -> np.array:
        """

        Returns: Returns the current asci map in the numpy format

        """
        # Copy map to work on

        out = self.desc.copy().tolist()
        out = [[c.decode('utf-8') for c in line] for line in out]

        taxis, fuels, passengers_start_coordinates, destinations, passengers_locations = self.state

        colors = ['yellow', 'red', 'white', 'green', 'cyan', 'crimson', 'gray', 'magenta'] * 5
        colored = [False] * self.num_taxis

        def ul(x):
            """returns underline instead of spaces when called"""
            return "_" if x == " " else x

        for i, location in enumerate(passengers_locations):
            if location > 2:  # Passenger is on a taxi
                taxi_row, taxi_col = taxis[location - 3]

                # Coloring taxi's coordinate on the map
                out[1 + taxi_row][2 * taxi_col + 1] = utils.colorize(
                    out[1 + taxi_row][2 * taxi_col + 1], colors[location - 3], highlight=True, bold=True)
                colored[location - 3] = True
            else:  # Passenger isn't in a taxi
                # Coloring passenger's coordinates on the map
                pi, pj = passengers_start_coordinates[i]
                out[1 + pi][2 * pj + 1] = utils.colorize(out[1 + pi][2 * pj + 1], 'blue', bold=True)

        for i, taxi in enumerate(taxis):
            if self.collided[i] == 0:  # Taxi isn't collided
                taxi_row, taxi_col = taxi
                out[1 + taxi_row][2 * taxi_col + 1] = utils.colorize(
                    ul(out[1 + taxi_row][2 * taxi_col + 1]), colors[i], highlight=True)
            else:  # Collided!
                taxi_row, taxi_col = taxi
                out[1 + taxi_row][2 * taxi_col + 1] = utils.colorize(
                    ul(out[1 + taxi_row][2 * taxi_col + 1]), 'gray', highlight=True)

        for dest in destinations:
            di, dj = dest
            out[1 + di][2 * dj + 1] = utils.colorize(out[1 + di][2 * dj + 1], 'magenta')

        return np.array(out)
Example #8
0
    def simulate_and_learn_policy(self):
        """Simulate the model and optimize the policy with the learned data.

        This consists of two steps:
            Step 1: Simulate trajectories with the model.
                Calls self.simulate_model().
            Step 2: Implement a model free RL method that optimizes the policy.
                Calls self.learn_policy(). To be implemented by a Base Class.
        """
        print(colorize("Optimizing Policy with Model Data", "yellow"))
        self.dynamical_model.eval()
        self.sim_dataset.reset()  # Erase simulation data set before starting.
        with DisableGradient(
                self.dynamical_model), gpytorch.settings.fast_pred_var():
            for i in tqdm(range(self.policy_opt_num_iter)):
                # Step 1: Compute the state distribution
                with torch.no_grad():
                    self.simulate_model()

                # Log last simulations.
                self._log_simulated_trajectory()

                # Step 2: Optimize policy
                self.learn_policy()

                if (self.sim_refresh_interval > 0
                        and (i + 1) % self.sim_refresh_interval == 0):
                    self.sim_dataset.reset()
Example #9
0
    def _render(self, mode='human', close=False):
        if close:
            return
        outfile = StringIO() if mode == 'ansi' else sys.stdout

        pos_info = self.s >> self.num_goals
        row, col = pos_info // self.ncol, pos_info % self.ncol
        desc = self.desc.tolist()
        desc = [[c.decode('utf-8') for c in line] for line in desc]

        if self.s != self.TERMINAL_STATE:
            desc[row][col] = utils.colorize(desc[row][col],
                                            "red",
                                            highlight=True)

        if self.lastaction is not None and self.lastaction != 8:
            outfile.write("  ({})\n".format(["Left", "Down", "Right",
                                             "Up"][self.lastaction]))
        elif self.s == self.TERMINAL_STATE:
            outfile.write("  (EXITED)\n")
        else:
            outfile.write("\n")

        outfile.write("\n".join(''.join(line) for line in desc) + "\n")

        return outfile
    def render(self, mode='human'):
        outfile = StringIO() if mode == 'ansi' else sys.stdout
        snapshot = ""

        out = self.desc.copy().tolist()
        out = [[c.decode('utf-8') for c in line] for line in out]

        for position in self.position_to_coordinates:
            if position != self.position_in_taxi:
                row, col = self.position_to_coordinates[position] 
                out[row + 1][2 * col + 1] = utils.colorize(str(position), 'yellow', highlight=True)

        outfile.write("\n".join(["".join(row) for row in out]) + "\n")

        #positions = self.decode(self.no_to_state[self.s]) #What is the purpose of decode
        state_objs = self.states[self.no_to_state[self.s]]
        taxi, passengers = state_objs[0], state_objs[1:]
        taxi_row, taxi_col = self.position_to_coordinates.get(taxi.get_location())

        for passenger in passengers:
            passenger_id = passenger.get_passenger_id()
            if passenger.get_location() == self.position_in_taxi: #in Taxi
                snapshot += "Passenger " + str(passenger_id) + " in taxi with " 
                snapshot += str(passenger_id) + " as destination\n"
            else:
                snapshot += "Passenger " + str(passenger) + " at " + str(passenger.get_location())
                snapshot += " with " + str(passenger.get_destination()) + " as destination\n"

        if taxi.get_no_of_passengers() == taxi.get_max_capacity():
            snapshot += "Taxi fully occupied at " + str(taxi.get_location())
        else:
            snapshot += "Taxi not fully occupied at " + str(taxi.get_location())

        outfile.write(snapshot)
Example #11
0
def dense_nn(inputs, layers_sizes, name="mlp", reuse=False, output_fn=None,
            dropout_keep_prob=None, batch_norm=False, training=True):
    
    print(colorize("Building mlb {} | sizes: {}".format(
        name, [inputs.shape[0]] + layers_sizes), "green"
    ))

    with tf.variable_scope(name, reuse=reuse):
        out = inputs
        for i, size in enumerate(layers_sizes):
            print("Layer:", name + '_l' + str(i), size)
            if i > 0 and dropout_keep_prob is not None and training:
                out = tf.nn.dropout(out, dropout_keep_prob)

            out = tf.layers.dense(
                out,
                size,
                activation=tf.nn.relu if i < len(layers_sizes) - 1 else None,
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                name=name + '_l' + str(i),
                reuse=reuse
            )

            if batch_norm:
                out = tf.layers.batch_normalization(out, training = training)

        if output_fn:
            out = output_fn(out)

    return out 
Example #12
0
def warn(msg, *args, category=None, stacklevel=1):
    if MIN_LEVEL <= WARN:
        warnings.warn(
            colorize("%s: %s" % ("WARN", msg % args), "yellow"),
            category=category,
            stacklevel=stacklevel + 1,
        )
Example #13
0
def warn(msg, *args, category=None, stacklevel=1):
    if MIN_LEVEL <= WARN:
        warnings.warn(
            colorize(f"WARN: {msg % args}", "yellow"),
            category=category,
            stacklevel=stacklevel + 1,
        )
def render_gridworld(  # pylint: disable=inconsistent-return-statements
        self, mode='human'):
    if mode not in ('human', 'ansi'):
        raise ValueError('Only `human` and `ansi` modes are supported')

    # stream where to send the string representation of the env
    outfile = sys.stdout if mode == 'human' else io.StringIO()

    if self.action_prev is not None:
        ai = self.action_prev.item()
        print(f'action: {self.model.actions[ai]}', file=outfile)

    if self.state < 5:
        i = self.state.item() // 4
        j = self.state.item() % 4
    else:
        i = (self.state.item() + 1) // 4
        j = (self.state.item() + 1) % 4

    desc = [['.', '.', '.', '+'], ['.', ' ', '.', '-'], ['.', '.', '.', '.']]
    desc[i][j] = colorize(desc[i][j], 'red', highlight=True)

    desc = '\n'.join(''.join(line) for line in desc)
    print(desc, file=outfile)

    if mode == 'ansi':
        with contextlib.closing(outfile):
            return outfile.getvalue()
Example #15
0
 def render(self, mode='human'):
     print('------------------------')
     print('State is in question: {0}'.format(bool(self.pos_idx['q'])))
     print('Sentence number:', colorize(str(self.pos_idx['sen']), 'yellow'))
     print('Current word state:',
           colorize(str(self.data.QA.IVOCAB[self.state]), 'cyan'))
     if self.last_action == self.current_qa[2]:
         print(colorize('Correct action! The answer is', 'green'), \
         self.data.QA.IVOCAB[self.last_action])
         if self.pos_idx['q'] == 1 and self.pos_idx['word'] > 1:
             self.num_correct += 1
     else:
         None
     print('Correct answers:', str(self.num_correct))
     print('Current QA:', colorize(str(self.i), 'magenta'))
     print('------------------------')
Example #16
0
def dense_nn(inputs,
             layers_sizes,
             name="mlp",
             reuse=None,
             dropout_keep_prob=None,
             batch_norm=False,
             training=True):
    print(
        colorize(
            "Building mlp {} | sizes: {}".format(name, [inputs.shape[0]] +
                                                 layers_sizes), "green"))

    with tf.variable_scope(name):
        for i, size in enumerate(layers_sizes):
            print("Layer:", name + '_l' + str(i), size)
            if i > 0 and dropout_keep_prob is not None and training:
                # No dropout on the input layer.
                inputs = tf.nn.dropout(inputs, dropout_keep_prob)

            inputs = tf.layers.dense(
                inputs,
                size,
                # Add relu activation only for internal layers.
                activation=tf.nn.relu if i < len(layers_sizes) - 1 else None,
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                reuse=reuse,
                name=name + '_l' + str(i))

            if batch_norm:
                inputs = tf.layers.batch_normalization(inputs,
                                                       training=training)

    return inputs
Example #17
0
    def render(self, mode="human"):
        """
        This methods provides the option to render the environment's behavior to a window
        which should be readable to the human eye if mode is set to 'human'.
        """
        outfile = StringIO() if mode == "ansi" else sys.stdout

        row, col = self.state // self.ncol, self.state % self.ncol
        grid_map = self.grid_map.tolist()
        grid_map = [[char.decode("utf-8") for char in line]
                    for line in grid_map]
        grid_map[row][col] = utils.colorize(grid_map[row][col],
                                            "red",
                                            highlight=True)
        if self.last_action is not None:
            last_action = [
                "Left",
                "Down",
                "Right",
                "Up",
                "Boost_Left",
                "Boost_Down",
                "Boost_Right",
                "Boost_Up",
            ][self.last_action]
            outfile.write(f"  ({last_action})\n")
        else:
            outfile.write("\n")
        outfile.write("\n".join("".join(line) for line in grid_map) + "\n")

        if mode != "human":
            with closing(outfile):
                return outfile.getvalue()
Example #18
0
    def _render(self, mode='human', close=False):
        if close:
            return

        outfile = StringIO() if mode == 'ansi' else sys.stdout

        for s in range(self.nS):
            position = np.unravel_index(s, self.shape)
            # print(self.s)
            if self.s == s:
                output = utils.colorize(" x ", "red", highlight=True)
            elif position == (3, 11):
                output = " T "
            elif self._cliff[position]:
                output = " C "
            else:
                output = " o "

            if position[1] == 0:
                output = output.lstrip()
            if position[1] == self.shape[1] - 1:
                output = output.rstrip()
                output += "\n"

            outfile.write(output)
        outfile.write("\n")
Example #19
0
    def render(self, mode="human", plot=False):
        outfile = StringIO() if mode == "ansi" else sys.stdout

        desc = self.desc.tolist()
        desc = [[c.decode("utf-8") for c in line] for line in desc]
        grid = self.decode(self.s)
        for row, col in zip(*np.where(grid == 1)):
            desc[row][col] = utils.colorize("*", "red", highlight=True, bold=True)

        if self.lastaction is not None:
            outfile.write(" Turn off ({},{})\n".format(*self._action_to_pos(self.lastaction)))
        else:
            outfile.write("\n")
        outfile.write("\n".join("".join(line) for line in desc) + "\n")

        if plot:
            outfile.write("\n")
            plt.pcolormesh(grid, edgecolors="w", linewidth=2, cmap="seismic")
            plt.axis("off")  # remove axis
            plt.gca().invert_yaxis()  # pcolormesh invert y axis so we re-invert it
            plt.gca().set_aspect("equal")  # display the grid as a square
            plt.show()

        # No need to return anything for human
        if mode != "human":
            with closing(outfile):
                return outfile.getvalue()
Example #20
0
def render_state(state_id, nrow, ncol, storm_maps, terminal_pos):
    def _decode(s):
        '''
            s: a number that represent the state.
            return: a tuple like (row, col), storm_index
        '''
        return ((s % (nrow * ncol)) // ncol, s % ncol), s // (ncol * nrow)

    def print_grid(air_map):
        for i in range(air_map.shape[0]):
            print(air_map[i, :].tostring().decode('utf-8'))

    # Generate map:
    air_map = np.zeros((nrow, ncol), dtype='U10')
    for i in range(nrow):
        for j in range(ncol):
            air_map[i, j] = '.'
    air_map[terminal_pos] = 'E'

    pos, storm = _decode(state_id)
    storm_map_now = storm_maps[storm]
    air_map[storm_map_now > .5] = 'S'
    air_map[pos] = utils.colorize('A', 'red', highlight=True)
    print_grid(air_map)
    print('')
    return
 def _render_walk(self):
     chars = ['#'] * self._walk_len
     c = 'green' if self._reached_right_edge() else 'red'
     chars[self._position] = utils.colorize(chars[self._position],
                                            color=c,
                                            highlight=True)
     return "".join(chars)
Example #22
0
def dense_nn(inputs,
             layers_sizes,
             name="mlp",
             output_fn=None,
             dropout_keep_prob=None,
             batch_norm=False,
             training=True):

    print(
        colorize(
            "Building mlp {} | sizes: {}".format(name, [inputs.shape[0]] +
                                                 layers_sizes), "green"))

    model = tf.keras.Sequential()

    for i, layer in enumerate(layers_sizes):
        print("Layer:", name + '_l' + str(i), layer)
        if i > 0 and dropout_keep_prob is not None and training:
            model.add(tf.keras.layers.Dropout(dropout_keep_prob))

        model.add(
            tf.keras.layers.Dense(
                layer,
                activation='relu' if i < len(layers_sizes) - 1 else None,
                name=name + '_l' + str(i)))

        if batch_norm:
            model.add(tf.keras.layers.BatchNormalization())

    model.build(input_shape=(4, ))
    return model
Example #23
0
 def __init__(
     self,
     env: PassiveEnvironment,
     first_epoch_only: bool = False,
     wandb_prefix: str = None,
 ):
     super().__init__(env)
     # Metrics mapping from step to the metrics at that step.
     self._metrics: Dict[int, ClassificationMetrics] = defaultdict(Metrics)
     self.first_epoch_only = first_epoch_only
     self.wandb_prefix = wandb_prefix
     # Counter for the number of steps.
     self._steps: int = 0
     assert isinstance(self.env.unwrapped, PassiveEnvironment)
     if not self.env.unwrapped.pretend_to_be_active:
         warnings.warn(
             RuntimeWarning(
                 colorize(
                     "Your online performance " +
                     ("during the first epoch " if self.first_epoch_only
                      else "") + "on this environment will be monitored! "
                     "Since this env is Passive, i.e. a Supervised Learning "
                     "DataLoader, the Rewards (y) will be withheld until "
                     "actions are passed to the 'send' method. Make sure that "
                     "your training loop can handle this small tweak.",
                     color="yellow",
                 )))
     self.env.unwrapped.pretend_to_be_active = True
     self.__epochs = 0
Example #24
0
    def render(self, mode='human', fidelity=0):
        outfile = StringIO() if mode == 'ansi' else sys.stdout

        out = np.asarray(MAP[fidelity], dtype='c').tolist()
        out = [[c.decode('utf-8') for c in line] for line in out]

        def ul(x):
            return "_" if x == " " else x

        if self.s is not None:
            row, col = self.decode(self.s)
            out[row][col] = utils.colorize(ul(out[row][col]), 'green', highlight=True)
        outfile.write('____________________\n')
        outfile.write("\n".join(['|' + "".join(row) + '|' for row in out]) + "\n")
        outfile.write('‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾‾\n')
        if self.lastaction is not None:
            outfile.write("  ({})\n".format(["South", "North", "East", "West"][self.lastaction]))
        else:
            outfile.write("\n")
        if self.lastfidelity is not None:
            outfile.write("  (Fidelity: {})\n".format(self.lastfidelity))
        else:
            outfile.write("\n")

        # No need to return anything for human
        if mode != 'human':
            with closing(outfile):
                return outfile.getvalue()
Example #25
0
    def _runtime_score(self) -> float:
        # TODO: function that takes the total runtime in seconds and returns a
        # normalized float score between 0 and 1.
        runtime_seconds = self._runtime
        if self._runtime is None:
            warnings.warn(
                RuntimeWarning(
                    colorize(
                        "Runtime is None! Returning runtime score of 0.\n (Make sure the "
                        "Setting had its `monitor_training_performance` attr set to True!",
                        color="red",
                    )))
            return 0
        runtime_hours = runtime_seconds / 3600

        # Get the maximum runtime for this type of Results (and Setting)
        min_runtime_hours = type(self).min_runtime_hours
        max_runtime_hours = type(self).max_runtime_hours

        assert 0 <= min_runtime_hours < max_runtime_hours
        assert 0 < runtime_hours
        if runtime_hours <= min_runtime_hours:
            return 1.0
        if max_runtime_hours <= runtime_hours:
            return 0.0
        return 1 - ((runtime_hours - min_runtime_hours) /
                    (max_runtime_hours - min_runtime_hours))
    def render(self, mode='human', close=False):
        outfile = StringIO() if mode == 'ansi' else sys.stdout

        out = self.desc.copy().tolist()
        out = [[c.decode('utf-8') for c in line] for line in out]
        pointer_row, pointer_col, hasPlayer = self.decode(self.s)

        def ul(x):
            return "_" if x == " " else x

        out[1 + pointer_row][pointer_col + 1] = utils.colorize(
            out[1 + pointer_row][pointer_col + 1], 'yellow', highlight=True)

        outfile.write("\n".join(["".join(row) for row in out]) + "\n")
        if self.lastaction is not None:
            outfile.write("  ({}),".format(pointer_row))
            outfile.write("  ({})".format(pointer_col))
            outfile.write("  ({})\n".format(hasPlayer))
            outfile.write("  ({})\n".format(
                ["Down", "Up", "Right", "Left", "Player"][self.lastaction]))
        else:
            outfile.write("\n")

        # No need to return anything for human
        if mode != 'human':
            with closing(outfile):
                return outfile.getvalue()
Example #27
0
    def render(self, mode='human'):
        outfile = StringIO() if mode == 'ansi' else sys.stdout
        if EXPLICIT_ABSORBING and self.s == self.s_absorb:
            outfile.write("In absorbing state \n")
            outfile.write("\n".join(''.join(line)
                                    for line in self.last_desc) + "\n")
        else:
            row, col = self.s // self.ncol, self.s % self.ncol
            desc = self.desc.tolist()
            desc = [[c.decode('utf-8') for c in line] for line in desc]
            # import pdb; pdb.set_trace()
            desc[row][col] = utils.colorize(desc[row][col],
                                            "red",
                                            highlight=True)
            if self.lastaction is not None:
                outfile.write("  ({})\n".format(
                    ["Left", "Down", "Right", "Up"][self.lastaction]))
            else:
                outfile.write("\n")
            outfile.write("\n".join(''.join(line) for line in desc) + "\n")
            self.last_desc = desc
            # self.last_row = row
            # self.last_col = col

        if mode != 'human':
            with closing(outfile):
                return outfile.getvalue()
    def _render(self, mode='human', close=False):
        if close:  # 初始化环境Environment的时候不显示
            return

        outfile = StringIO() if mode == 'ansi' else sys.stdout

        desc = self.desc.tolist()
        desc = [[c.decode('utf-8') for c in line] for line in desc]

        state_grid = np.arange(self.nS).reshape(self.shape)
        it = np.nditer(state_grid, flags=['multi_index'])

        while not it.finished:
            s = it.iterindex
            y, x = it.multi_index

            # 对于当前状态用红色标注
            if self.s == s:
                desc[y][x] = utils.colorize(desc[y][x], "red", highlight=True)

            it.iternext()

        outfile.write("\n".join(' '.join(line) for line in desc) + "\n")

        if mode != 'human':
            return outfile
Example #29
0
def warn(msg: str, *args) -> None:
    """
    Custom definition of :py:func:`gym.logger.warn` function.
    """

    if logger.MIN_LEVEL <= logger.WARN:
        warnings.warn(colorize('%s: %s' % ('WARN', msg % args), 'yellow'),
                      stacklevel=2)
    def _update_model_posterior(self, last_trajectory):
        """Update model posterior of GP-models with new data."""
        if isinstance(self.dynamical_model.base_model, ExactGPModel):
            observation = stack_list_of_tuples(last_trajectory)  # Parallelize.
            if observation.action.shape[-1] > self.dynamical_model.dim_action[
                    0]:
                observation.action = observation.action[
                    ..., :self.dynamical_model.dim_action[0]]
            for transform in self.train_set.transformations:
                observation = transform(observation)
            print(colorize("Add data to GP Model", "yellow"))
            self.dynamical_model.base_model.add_data(observation.state,
                                                     observation.action,
                                                     observation.next_state)

            print(colorize("Summarize GP Model", "yellow"))
            self.dynamical_model.base_model.summarize_gp()
    def load_model(self):
        print(colorize(" [*] Loading checkpoints...", "green"))

        ckpt = tf.train.get_checkpoint_state(self.checkpoint_dir)
        print(self.checkpoint_dir, ckpt)
        if ckpt and ckpt.model_checkpoint_path:
            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
            print(ckpt_name)
            fname = os.path.join(self.checkpoint_dir, ckpt_name)
            print(fname)
            self.saver.restore(self.sess, fname)
            print(colorize(" [*] Load SUCCESS: %s" % fname, "green"))
            return True
        else:
            print(colorize(" [!] Load FAILED: %s" % self.checkpoint_dir,
                           "red"))
            return False
Example #32
0
def warn(msg: str, *args) -> None:
    """
    Custom definition of :py:func:`gym.logger.warn` function.
    """

    if logger.MIN_LEVEL <= logger.WARN:
        warnings.warn(colorize("%s: %s" % ("WARN", msg % args), "yellow"),
                      stacklevel=2)
Example #33
0
def lstm_net(inputs,
             layers_sizes,
             name='lstm',
             step_size=16,
             lstm_layers=1,
             lstm_size=256,
             pre_lstm_dense_layer=None,
             dropout_keep_prob=None,
             training=True):
    """inputs = (batch_size * step_size, *observation_size)
    """
    print(colorize("Building lstm net " + name, "green"))
    print("inputs.shape =", inputs.shape)

    state_size = inputs.shape.as_list()[1]
    inputs = tf.reshape(inputs, [-1, step_size, state_size])
    print("reshaped inputs.shape =", inputs.shape)

    def _make_cell():
        cell = tf.nn.rnn_cell.LSTMCell(lstm_size,
                                       state_is_tuple=True,
                                       reuse=not training)
        if training and dropout_keep_prob:
            cell = tf.contrib.rnn.DropoutWrapper(
                cell, output_keep_prob=dropout_keep_prob)
        return cell

    with tf.variable_scope(name):

        if pre_lstm_dense_layer:
            inputs = tf.nn.relu(
                dense_nn(inputs, [pre_lstm_dense_layer], name='pre_lstm'))

        with tf.variable_scope('lstm_cells'):
            # Before transpose, inputs.get_shape() = (batch_size, num_steps, lstm_size)
            # After transpose, inputs.get_shape() = (num_steps, batch_size, lstm_size)
            lstm_inputs = tf.transpose(inputs, [1, 0, 2])

            cell = tf.contrib.rnn.MultiRNNCell(
                [_make_cell() for _ in range(lstm_layers)],
                state_is_tuple=True)
            lstm_outputs, lstm_states = tf.nn.dynamic_rnn(cell,
                                                          lstm_inputs,
                                                          dtype=tf.float32)

            # transpose back.
            lstm_outputs = tf.transpose(lstm_outputs, [1, 0, 2])

            print("cell =", cell)
            print("lstm_states =", lstm_states)
            print("lstm_outputs.shape =", lstm_outputs.shape)

        outputs = dense_nn(lstm_outputs, layers_sizes, name="outputs")
        print("outputs.shape =", outputs.shape)

        outputs = tf.reshape(outputs, [-1, layers_sizes[-1]])
        print("reshaped outputs.shape =", outputs.shape)
        return outputs
Example #34
0
    def render_observation(self, observation, outfile=sys.stdout):
        out = observation.copy().tolist()
        out = [[self.MapChars[i] for i in line] for line in out]

        b = utils.colorize(' ', 'gray', highlight=True)
        bline = "{}{}{}\n".format(b, b * observation.shape[1], b)
        outfile.write(bline)
        outfile.write("\n".join([(b + "".join(row) + b) for row in out]) + "\n")
        outfile.write(bline)
Example #35
0
 def render_observation(self):
     x = self.read_head_position
     x_str =      "Observation Tape    : "
     for i in range(-2, self.input_width + 2):
         if i == x:
             x_str += colorize(self._get_str_obs(np.array([i])), 'green', highlight=True)
         else:
             x_str += self._get_str_obs(np.array([i]))
     x_str += "\n"
     return x_str
Example #36
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("envid")
    parser.add_argument("outfile")
    parser.add_argument("--gymdir")

    args = parser.parse_args()
    if args.gymdir:
        sys.path.insert(0, args.gymdir)
    import gym
    from gym import utils
    print utils.colorize("gym directory: %s"%path.dirname(gym.__file__), "yellow")
    env = gym.make(args.envid)
    agent = RandomAgent(env.action_space)
    alldata = {}
    for i in xrange(2):
        np.random.seed(i)
        data = rollout(env, agent, env.spec.tags['wrapper_config.TimeLimit.max_episode_steps'])
        for (k, v) in data.items():
            alldata["%i-%s"%(i, k)] = v
    np.savez(args.outfile, **alldata)
Example #37
0
	def _render(self,mode='human', close=False):
		if close:
			return
		outfile = StringIO() if mode == 'ansi' else sys.stdout
		row = col = 4
		desc = self.matrix.tolist()
		desc = [[c.decode('utf-8') for c in line] for line in desc]
		desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
		outfile.write("\n".join(''.join(line) for line in desc)+"\n")
		if self.lastaction is not None:
			outfile.write("  ({})\n".format(["Left", "Down", "Right", "Up"][self.lastaction]))
		else:
			outfile.write("\n")
		return outfile
Example #38
0
 def render_observation(self):
     x = self.read_head_position
     label =      "Observation Grid    : "
     x_str = ""
     for j in range(-1, self.rows+1):
         if j != -1:
             x_str += " " * len(label)
         for i in range(-2, self.input_width + 2):
             if i == x[0] and j == x[1]:
                 x_str += colorize(self._get_str_obs((i, j)), 'green', highlight=True)
             else:
                 x_str += self._get_str_obs((i, j))
         x_str += "\n"
     x_str = label + x_str
     return x_str
Example #39
0
    def render(self, mode='human'):
        outfile = StringIO() if mode == 'ansi' else sys.stdout

        row, col = self.s // self.ncol, self.s % self.ncol
        desc = self.desc.tolist()
        desc = [[c.decode('utf-8') for c in line] for line in desc]
        desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
        if self.lastaction is not None:
            outfile.write("  ({})\n".format(["Left","Down","Right","Up"][self.lastaction]))
        else:
            outfile.write("\n")
        outfile.write("\n".join(''.join(line) for line in desc)+"\n")

        if mode != 'human':
            return outfile
  def _render(self, mode='human', close=False):
    if close:
      return

    outfile = StringIO.StringIO() if mode == 'ansi' else sys.stdout

    row, col = self.s // self.ncol, self.s % self.ncol
    desc = self.desc.tolist()
    desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)

    outfile.write("\n".join("".join(row) for row in desc)+"\n")
    if self.lastaction is not None:
      outfile.write("  ({})\n".format(self.get_action_meanings()[self.lastaction]))
    else:
      outfile.write("\n")

    return outfile
Example #41
0
    def _render(self, mode="human", close=False):
        if close:
            return

        outfile = StringIO() if mode == "ansi" else sys.stdout

        row, col = self.s // self.ncol, self.s % self.ncol
        desc = self.desc.tolist()
        desc = [[c.decode("utf-8") for c in line] for line in desc]
        desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True)
        outfile.write("\n".join("".join(line) for line in desc) + "\n")
        if self.lastaction is not None:
            outfile.write("  ({})\n".format(["Left", "Down", "Right", "Up"][self.lastaction]))
        else:
            outfile.write("\n")

        return outfile
Example #42
0
    def _render(self, mode='human', close=False):
        if close:
            # Nothing interesting to close
            return

        outfile = StringIO() if mode == 'ansi' else sys.stdout
        inp = "Total length of input instance: %d, step: %d\n" % (self.input_width, self.time)
        outfile.write(inp)
        x, y, action = self.read_head_position, self.write_head_position, self.last_action
        if action is not None:
            inp_act, out_act, pred = action
        outfile.write("=" * (len(inp) - 1) + "\n")
        y_str =      "Output Tape         : "
        target_str = "Targets             : "
        if action is not None:
            pred_str = self.charmap[pred]
        x_str = self._render_observation()
        for i in range(-2, len(self.target) + 2):
            target_str += self._get_str_target(i)
            if i < y - 1:
                y_str += self._get_str_target(i)
            elif i == (y - 1):
                if action is not None and out_act == 1:
                    color = 'green' if pred == self.target[i] else 'red'
                    y_str += colorize(pred_str, color, highlight=True)
                else:
                    y_str += self._get_str_target(i)
        outfile.write(x_str)
        outfile.write(y_str + "\n")
        outfile.write(target_str + "\n\n")

        if action is not None:
            outfile.write("Current reward      :   %.3f\n" % self.last_reward)
            outfile.write("Cumulative reward   :   %.3f\n" % self.episode_total_reward)
            move = self.MOVEMENTS[inp_act]
            outfile.write("Action              :   Tuple(move over input: %s,\n" % move)
            out_act = out_act == 1
            outfile.write("                              write to the output tape: %s,\n" % out_act)
            outfile.write("                              prediction: %s)\n" % pred_str)
        else:
            outfile.write("\n" * 5)
        return outfile
Example #43
0
    def _render(self, mode='human', close=False):
        if close:
            # Nothing interesting to close
            return

        outfile = StringIO() if mode == 'ansi' else sys.stdout
        inp = "Total length of input instance: %d, step: %d\n" % (self.total_len, self.time)
        outfile.write(inp)
        x, y, action = self.x, self.y, self.last_action
        if action is not None:
            inp_act, out_act, pred = action
        outfile.write("=" * (len(inp) - 1) + "\n")
        y_str =      "Output Tape         : "
        target_str = "Targets             :   "
        if action is not None:
            if self.chars:
                pred_str = chr(pred + ord('A'))
            else:
                pred_str = str(pred)
        x_str = self._render_observation()
        max_len = int(self.total_reward) + 1
        for i in range(-2, max_len):
            if i not in self.target:
                y_str += " "
                continue
            target_str += self._get_str_target(i)
            if i < y - 1:
                y_str += self._get_str_target(i)
            elif i == (y - 1):
                if action is not None and out_act == 1:
                    if pred == self.target[i]:
                        y_str += colorize(pred_str, 'green', highlight=True)
                    else:
                        y_str += colorize(pred_str, 'red', highlight=True)
                else:
                    y_str += self._get_str_target(i)
        outfile.write(x_str)
        outfile.write(y_str + "\n")
        outfile.write(target_str + "\n\n")

        if action is not None:
            outfile.write("Current reward      :   %.3f\n" % self.reward)
            outfile.write("Cumulative reward   :   %.3f\n" % self.sum_reward)
            move = ""
            if inp_act == 0:
                move = "left"
            elif inp_act == 1:
                move = "right"
            elif inp_act == 2:
                move += "up"
            elif inp_act == 3:
                move += "down"
            outfile.write("Action              :   Tuple(move over input: %s,\n" % move)
            if out_act == 1:
                out_act = "True"
            else:
                out_act = "False"
            outfile.write("                              write to the output tape: %s,\n" % out_act)
            outfile.write("                              prediction: %s)\n" % pred_str)
        else:
            outfile.write("\n" * 5)
        return outfile
Example #44
0
File: logger.py Project: joschu/gym
def warn(msg, *args):
    if MIN_LEVEL <= WARN:
        print(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
Example #45
0
File: logger.py Project: joschu/gym
def error(msg, *args):
    if MIN_LEVEL <= ERROR:
        print(colorize('%s: %s'%('ERROR', msg % args), 'red'))