Exemple #1
0
 def encodes(self, o: TSTensor):
     device = o.device
     if o.data.device.type == 'cuda': o = o.cpu()
     if o.ndim == 2: o = o[None]
     nvars, seq_len = o.shape[-2:]
     aspect = seq_len / nvars
     size = ifnone(self.size, seq_len)
     fig = get_plot_fig(self.size, dpi=self.dpi)
     ax = fig.get_axes()[0]
     ax.set_xlim(0, seq_len - 1)
     canvas = FigureCanvasAgg(fig)
     output = []
     for oi in o:
         if output == []:
             im = ax.imshow(oi,
                            aspect=aspect,
                            vmin=-1,
                            vmax=1,
                            cmap=self.cmap,
                            **self.kwargs)
         else:
             im.set_data(oi)
         canvas.draw()
         buf = np.asarray(canvas.buffer_rgba())[..., :3]
         canvas.flush_events()
         output.append(tensor(buf / 255).permute(2, 0, 1)[None])
     return TSImage(torch.cat(output)).to(device=device)
Exemple #2
0
class GridWorldEnv(DiscreteEnv):
    """
    Actions: 0 left and 1 right
    """
    def __init__(self, size=20, discount=0.99, seed=0):
        self.__name__ = self.__class__.__name__ + str(seed)
        self._state = 0
        self._states = None
        self._fig = None
        self.discount = discount
        self.max_path_length = 2 * size
        np.random.seed(seed)
        self._grid = np.random.binomial(1, 0.2, size=(size, size))
        self._grid[0, 0], self._grid[1, 0], self._grid[0, 1], self._grid[1, 1] = 0, 0, 0, 0
        self._grid[-1, -1], self._grid[-2, -1], self._grid[-1, -2], self._grid[-2, -2] = 0, 0, 0, 0
        self._rgb_grid = np.zeros((size, size, 3), dtype=np.uint8)
        self._rgb_grid[:, :, :] = np.expand_dims(((1-self._grid) * 255).astype(np.uint8), axis=-1)
        self._rgb_grid[size-1, size-1, :] = 255, 215, 0
        self._size = size
        self.dt = .02
        self.obs_dims = 2
        self._scale = 4
        self.vectorized = True
        DiscreteEnv.__init__(self, size * size + 1, 4)

    def step(self, action):
        probs = self._transitions[self._state, action]
        next_state = np.argmax(np.random.multinomial(1, probs))
        reward = self._rewards[self._state, action, next_state]
        done = self._state == self._size ** 2
        env_info = dict()
        self._state = next_state
        return next_state, reward, done, env_info

    def reset(self):
        self._states = None
        state = np.random.randint(0, self._size * self._size)
        while self._grid[state % self._size, state//self._size]:
            state = np.random.randint(0, self._size * self._size)
        self._state = state
        return self._state

    def vec_reset(self, num_states):
        states = np.random.randint(0, self._size * self._size, size=(num_states,))
        collisions = self._grid[states % self._size, states // self._size]
        num_collisions = np.sum(collisions)
        while num_collisions:
            states[collisions.astype(bool)] = np.random.randint(0, self._size * self._size, size=(num_collisions,))
            collisions = self._grid[states % self._size, states // self._size]
            num_collisions = np.sum(collisions)
        self._states = states
        return self._states

    def vec_step(self, actions):
        assert self._states is not None
        assert len(self._states) == len(actions)
        probs = self._transitions[self._states, actions]
        next_states = np.argmax(probs, axis=-1)
        rewards = self._rewards[self._states, actions, next_states]
        dones = self._states == self._size ** 2
        env_info = dict()
        self._states = next_states
        return next_states, rewards, dones, env_info

    def _build_transitions(self):
        size = self._size
        for x in range(size):
            for y in range(size):
                for act in range(4):
                    id_s = x + y * size
                    if act == 0:
                        next_x = x - 1
                        next_y = y
                    elif act == 1:
                        next_x = x + 1
                        next_y = y
                    elif act == 2:
                        next_x = x
                        next_y = y - 1
                    else:
                        next_x = x
                        next_y = y + 1

                    if (next_x < 0) or (next_x >= size):
                        next_x = x
                    if (next_y < 0) or (next_y >= size):
                        next_y = y
                    if self._grid[next_x, next_y] or self._grid[x, y]:
                        next_x, next_y = x, y

                    id_next_s = next_x + next_y * size
                    self._transitions[id_s, act, id_next_s] = 1.
            self._transitions[-2, :, :] = 0.
            self._transitions[-2, :, -1] = 1.
            self._transitions[-1, :, -1] = 1.

    def _build_rewards(self):
        self._rewards[-2, :, -1] = 1.

    def render(self, mode='human', iteration=None):
        if self._fig is None:
            self._fig = plt.figure()
            self._ax = self._fig.add_subplot(111)
            data = upsample(self._rgb_grid, self._scale)
            self._render = self._ax.imshow(data, animated=True)
            self._ax.tick_params(
                axis='both',
                bottom=False,
                top=False,
                left=False,
                right=False,
                labelbottom=False,
                labelleft=False)  # labels along the bottom edge are off
            self._ax.set_aspect('equal')
            self._canvas = FigureCanvas(self._fig)

        data = self._rgb_grid.copy()
        if self._states is None:
            x, y = self._state % self._size, self._state//self._size
            if self._state != self._size ** 2:
                data[x, y, :] = [255, 0, 0]

        else:
            x, y = self._states % self._size, self._states//self._size
            x = x[self._states != self._size ** 2]
            y = y[self._states != self._size ** 2]
            data[x, y, :] = [255, 0, 0]

        data = upsample(data, self._scale)
        self._render.set_data(data)
        if iteration is not None:
            self._ax.set_title('Iteration %d' % iteration)
        self._canvas.draw()
        self._canvas.flush_events()
        time.sleep(self.dt)

        if mode == 'rgb_array':
            width, height = self._fig.get_size_inches() * self._fig.get_dpi()
            image = np.fromstring(self._canvas.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
            return image

    def upsample(self, image, scale):
        up_image = np.repeat(image, self._scale, axis=0)
        up_image = np.repeat(up_image, self._scale, axis=1)
        return up_image

    def close(self):
        plt.close()
        self._fig = None
Exemple #3
0
class DoubleIntegratorEnv(Env):
    """
    state: [pos, vel]
    """
    def __init__(self, discount=0.99):
        self._state = np.zeros((2, ))
        self.dt = 0.05
        self.max_path_length = 200
        self._fig = None
        self.discount = discount
        self.vectorized = True
        self.action_space = spaces.Box(low=np.array((-3, )),
                                       high=np.array((3, )),
                                       dtype=np.float64)
        self.observation_space = spaces.Box(low=np.array((-4, -4)),
                                            high=np.array((4, 4)),
                                            dtype=np.float64)

    def step(self, action):
        next_state = self._state + np.array([self._state[1], action[0]
                                             ]) * self.dt
        reward = -0.5 * (self._state[0]**2 + self._state[1]**2 + action**2)
        done = (next_state < self.observation_space.low).any() or (
            next_state > self.observation_space.high).any()
        env_info = dict()
        self._state = next_state
        if done:
            reward /= (1 - self.discount)
        return next_state.copy(), reward, done, env_info

    def reset(self):
        self._states = None
        # self._state = np.random.uniform(low=-2, high=2, size=2)
        self._state = np.ones((2, ))
        return self._state.copy()

    def set_state(self, state):
        self._state = state

    def vec_step(self, actions):
        next_states = self._states + np.stack(
            [self._states[:, 1], actions[:, 0]], axis=-1) * self.dt
        rewards = -0.5 * (self._states[:, 0]**2 + self._states[:, 1]**2 +
                          actions[:, 0]**2)
        dones = np.sum([
            (next_states[:, i] < l) + (next_states[:, i] > h)
            for i, (l, h) in enumerate(
                zip(self.observation_space.low, self.observation_space.high))
        ],
                       axis=0).astype(np.bool)
        env_infos = dict()
        self._states = next_states
        rewards[dones] /= (1 - self.discount)
        return next_states, rewards, dones, env_infos

    def vec_set_state(self, states):
        self._states = states

    def vec_reset(self, num_envs=None):
        if num_envs is None:
            assert self._num_envs is not None
            num_envs = self._num_envs
        else:
            self._num_envs = num_envs
        self._states = np.random.uniform(low=-2, high=2, size=(num_envs, 2))
        return self._states

    def render(self, mode='human', iteration=None):
        if self._fig is None:
            self._fig = plt.figure()
            self._ax = self._fig.add_subplot(111)
            self._agent_render, = self._ax.plot(self._state[0], 0, 'ro')
            self._goal_render, = self._ax.plot(0, 'y*')
            self._ax.set_xlim(-4.5, 4.5)
            self._ax.set_ylim(-.5, .5)
            self._ax.set_aspect('equal')
            self._canvas = FigureCanvas(self._fig)

        self._agent_render.set_data(self._state[0], 0)
        if iteration is not None:
            self._ax.set_title('Iteration %d' % iteration)
        self._canvas.draw()
        # time.sleep(self.dt)
        self._canvas.flush_events()
        if mode == 'rgb_array':
            width, height = self._fig.get_size_inches() * self._fig.get_dpi()
            image = np.fromstring(self._canvas.tostring_rgb(),
                                  dtype='uint8').reshape(
                                      int(height), int(width), 3)
            return image

    def close(self):
        plt.close()
        self._fig = None
Exemple #4
0
class ASRSEnv(object):
    """
    Description:
    There is a storage warehouse with M= W * H bins. M (or W * H) types of products will be store in this warehouse. Each period, there will be an array of orders coming in. 
    A robot can exchange the positions of two bins. The goal is to find the optimal storage plan to make best fullfil the orders.

    Observation:
        Current Storage Map    np.array()   (M,) any 1d array    
                row number indicates the bin number, the value indicates the the good number being stored in the bin.
                Good number starts from 1.
        Current Period Order   np.array()   (M,)
    State:
        Current Storage Map    np.array()   (M,)     
        Time to receive next order  np.array()   (M,)   
        Current 

    Action:
        Num	of Action: M Choose 2 + 1
        e.g  (a , b) switch bin a with bin b 
             or do nothing

    Reward:
        Reward is -1 for every step taken

    Starting State:
        A random permutation.
    
    Parameter type:
        storage_shape: tuple with 1 to 3 int  
        dist_param: list with M numbers in [0, 1] 
    """

    metadata = {
        'render.modes': ['human', 'rgb_array'],
        'video.frames_per_second': 30
    }

    def __init__(self,
                 storage_shape,
                 order_process,
                 origin_coord=None,
                 seed=42):
        self.order_process = order_process
        self.map_random = np.random.RandomState()
        self.set_seed(seed)
        assert len(storage_shape) <= 3, "storage_shape length should be <= 3"
        self.storage_shape = storage_shape
        self.obs_dim = 1
        self.num_products = np.array(storage_shape).prod()
        self.num_maps = math.factorial(self.num_products)
        self.num_actions = int(self.num_products *
                               (self.num_products - 1) / 2 + 1)
        assert (origin_coord is None) or (len(storage_shape) == len(
            origin_coord)), "origin_coord does not have correct dimensions"
        if origin_coord:
            if np.array(origin_coord).ndim == 2:
                self.origin_coords = np.array(origin_coord)
            else:
                self.origin_coords = np.array(origin_coord)[np.newaxis]
        else:
            self.origin_coords = np.zeros(
                (len(storage_shape), storage_shape[0])).astype(int)
            self.origin_coords[0] = np.arange(storage_shape[0])
            self.origin_coords = self.origin_coords.T
            # np.zeros(len(storage_shape)).astype(int)
            # Default is (:, 0, 0)
        self.dist_origin_to_exit = 1  # Distance from origin to exit

        assert order_process.num_products == self.num_products, "Number of products in order process need to match number of storage bins"

        self.dynamic_order = order_process.dynamic_order
        self.reset()

        self._fig = None
        # self.cmap = matplotlib.cm.get_cmap('Spectral')
        self.cmap = matplotlib.cm.get_cmap('coolwarm')
        self.dt = .2
        self._scale = 16
        self.vectorized = True
        self.__name__ = 'ASRSEnv'

    def set_seed(self, seed=None):
        if seed:
            self.seed_num = seed
            self.order_process.set_seed(self.seed_num + 1000)
        self.map_random.seed(self.seed_num + 100)

    def reset(self):
        self.set_seed()
        self._storage_maps = None
        self._num_envs = None
        self.order_process.reset()
        self.storage_map = self.map_random.permutation(self.num_products) + 1
        return np.array(self.storage_map).copy()

    def vec_reset(self, num_envs=None):
        self.set_seed()
        self.order_process.reset()
        if num_envs is None:
            assert self._num_envs is not None
            num_envs = self._num_envs
        else:
            self._num_envs = num_envs
        self._storage_maps = np.vstack(
            list(
                map(self.map_random.permutation,
                    [self.num_products] * num_envs))) + 1
        return np.array(self._storage_maps).copy()

    def get_bin_coordinate(self, bin_id):
        '''
        Given a bin number, this gives the location of the bin. This can be useful to to calculate distance between bin to exit.
        '''
        if len(self.storage_shape) == 3:
            a, b, c = self.storage_shape
            x, y, z = bin_id // (b * c), bin_id % (b * c) // c, bin_id % c
            return x, y, z
        elif len(self.storage_shape) == 2:
            a, b = self.storage_shape
            x, y = bin_id // b, bin_id % b
            return x, y
        elif len(self.storage_shape) == 1:
            return bin_id

    def get_distance_to_exit(self, bin_id=None):
        if bin_id is None:
            coords = self.get_bin_coordinate(np.arange(self.num_products))
        else:
            coords = self.get_bin_coordinate(bin_id)

        dist_to_each_exit = []
        for origin in self.origin_coords:
            dist_to_each_exit.append(
                self.get_distance_between_coord(coords, np.vstack(origin)) +
                self.dist_origin_to_exit)
        return np.array(dist_to_each_exit).min(axis=0)

    def get_distance_between_coord(self, coord1, coord2):
        return np.abs(np.array(coord1) - np.array(coord2)).sum(axis=0)

    def get_order_sequence(self, num_period=1):
        # Can only generate order sequences for 1 environments
        order_sequence = np.zeros((num_period, self.num_products))
        p_sequence = np.zeros((num_period, self.num_products))
        for t in range(num_period):
            order = self.order_process.get_orders(num_envs=1)
            order_sequence[t] = order
            p_sequence[t] = self.order_process.dist_param
        return order_sequence, p_sequence

    def step(self, action=None, rollout=True):
        '''
        Action should be a tuple (x, y), which indicates that good in bin number x and bin number y should switch.
        '''
        assert action is None or (action[0] < action[1]
                                  and action[1] < self.num_products and
                                  action[0] > -1), f"Invalid action {action}!"

        storage_map = self.storage_map
        exchange_cost = 0
        if rollout:
            order = self.order_process.get_orders()
        else:
            order = None
        if (action is not None):
            storage_map[action[0]], storage_map[action[1]] = storage_map[
                action[1]], storage_map[action[0]]
            exchange_cost += self.get_distance_between_coord(
                self.get_bin_coordinate(action[0]),
                self.get_bin_coordinate(action[1]))
        self.storage_map = storage_map
        return self.storage_map.copy(), order, exchange_cost

    def vec_step(self, actions, rollout=True):
        # actions is a list of length n either 2-tuple or None
        assert np.array(
            list(
                map((lambda action: action is None or (action[0] < action[
                    1] and action[1] < self.num_products and action[0] > -1)),
                    actions))).all()
        assert self._storage_maps is not None
        actions = np.array(
            [action if action is not None else (0, 0) for action in actions])
        self._storage_maps = self.vec_next_storage(self._storage_maps, actions)
        if rollout:
            orders = self.order_process.get_orders(num_envs=self._num_envs)
        else:
            orders = None
        exchange_costs = self.get_distance_between_coord(
            self.get_bin_coordinate(actions[:, 0]),
            self.get_bin_coordinate(actions[:, 1]))
        return self._storage_maps.copy(), orders, exchange_costs

    def vec_next_storage(self, storage_maps, actions):
        next_storage_maps = storage_maps.copy()
        range_n = np.arange(next_storage_maps.shape[0])
        next_storage_maps[range_n, actions[:,0]], next_storage_maps[range_n, actions[:,1]] =\
             next_storage_maps[range_n, actions[:,1]], next_storage_maps[range_n, actions[:,0]]
        return next_storage_maps

    def set_state(self, storage_map):
        self.storage_map = storage_map.copy()

    def vec_set_state(self, storage_maps):
        self._num_envs = len(storage_maps)
        self._storage_maps = storage_maps.copy()

    def render(self, mode='human', iteration=None):
        assert len(self.storage_shape
                   ) == 2, "Storage map need to be 2-d in order to render"
        if self._fig is None:
            self._fig = plt.figure()
            self._ax = self._fig.add_subplot(111)
            self._ax.tick_params(
                axis='both',
                bottom=False,
                top=False,
                left=False,
                right=False,
                labelbottom=False,
                labelleft=False)  # labels along the bottom edge are off
            self._ax.set_aspect('equal')
            self._canvas = FigureCanvas(self._fig)
        if self._storage_maps is not None:
            current_map = self._storage_maps[0].reshape(self.storage_shape)
        else:
            current_map = self.storage_map.reshape(self.storage_shape)
        # if self.dynamic_order:
        #     data = self.cmap((self.long_term_2p[self.season]/2)[current_map-1])
        # else:
        #     data = self.cmap(self.order_process.dist_param[current_map-1])
        data = self.cmap(self.order_process.dist_param[current_map - 1])
        data = self.upsample(data, self._scale)
        for ix, iy in np.ndindex(self.storage_shape):
            number = current_map[ix, iy]
            box = data[ix * self._scale:(ix + 1) * self._scale,
                       iy * self._scale:(iy + 1) * self._scale, :3]
            self.add_numbers_on_plot(number, box)
        for origin_num in range(self.origin_coords.shape[0]):
            self.mark_exit_on_plot(data, origin_num)
        self._render = self._ax.imshow(data, animated=True)
        # self._render.set_data(data)
        if iteration is not None:
            self._ax.set_title('Iteration %d, time %d' %
                               (iteration, self.order_process.age))
        self._canvas.draw()
        self._canvas.flush_events()
        time.sleep(self.dt)

        if mode == 'rgb_array':
            width, height = self._fig.get_size_inches() * self._fig.get_dpi()
            image = np.fromstring(self._canvas.tostring_rgb(),
                                  dtype='uint8').reshape(
                                      int(height), int(width), 3)
            return image
        if mode == 'human':
            s, (width, height) = self._canvas.print_to_buffer()
            plt.imshow(
                np.fromstring(s,
                              dtype='uint8').reshape(int(height), int(width),
                                                     4))

    def upsample(self, image, scale):
        up_image = np.repeat(image, scale, axis=0)
        up_image = np.repeat(up_image, scale, axis=1)
        return up_image

    def add_numbers_on_plot(self, number, box):
        number = str(number)
        imageRGB = Image.new('RGB', (self._scale, self._scale))
        draw = ImageDraw.Draw(imageRGB)
        font = ImageFont.truetype('/Library/Fonts/Arial.ttf', size=12)
        w, h = draw.textsize(number, font=font)
        draw.text(((self._scale - w) / 2, (self._scale - h) / 2), number)
        p = 1 - np.array(imageRGB) / 255
        box[np.where(p == 0)] = p[np.where(p == 0)]

    def mark_exit_on_plot(self, plot, origin_num):
        box = plot[self.origin_coords[origin_num][0] *
                   self._scale:(self.origin_coords[origin_num][0] + 1) *
                   self._scale, self.origin_coords[origin_num][1] *
                   self._scale:(self.origin_coords[origin_num][1] + 1) *
                   self._scale, :3]
        border = np.ones((self._scale, self._scale, 3), dtype=bool)
        border[1:-1, 1:-1, :] = False
        color = np.array([255, 215, 0]) / 255
        rgb_patch = np.ones((self._scale, self._scale, 3), dtype=np.uint8)
        rgb_patch = rgb_patch * color
        box[border] = rgb_patch[border]

    def close(self):
        plt.close()
        self._fig = None