Exemple #1
0
 def __set_yp_yn_from_action(self, action):
     if action is None:
         warnings.warn(
             "no action was provided. taking default action of not changing allocation"
         )
     else:
         action = np.array(action)
         if not (hasattr(action, 'shape')
                 and list(action.shape) == list(self.action_space.shape)):
             raise error.InvalidAction(
                 'action shape must be as per env.action_space.shape. Provided action was {0}'
                 .format(action))
         if np.round(np.sum(action)) != self.nbikes:
             raise error.InvalidAction(
                 'Dimensions of action must sum upto env.metadata["nbikes"]. Provided action was {0} with sum {1}'
                 .format(action, sum(action)))
         if np.any(action < -1e-6):
             raise error.InvalidAction(
                 'Each dimension of action must be positive. Provided action was {0}'
                 .format(action))
         if np.any(action > self.capacities + 1e-6):
             raise error.InvalidAction(
                 'Individual dimensions of action must be less than respective dimentions of env.metadata["capacities"]. Provided action was {0}'
                 .format(self.capacities - action))
         # print("action: ", action)
         # print("current_alloc", self.__ds[self.__t])
         alloc_diff = action - np.array(self.__ds[self.__t])
         yn = alloc_diff * (alloc_diff > 0)
         yp = -alloc_diff * (alloc_diff < 0)
         self.__yp[self.__t] = list(yp)
         self.__yn[self.__t] = list(yn)
Exemple #2
0
    def step(self, action):
        '''
        0: move ahead
        1: move back
        2: rotate right
        3: rotate left
        4: look down
        5: look up
        '''

        if action >= self.action_space:
            raise error.InvalidAction('Action must be an integer between '
                                      '0 and {}!'.format(self.action_space -
                                                         1))
        k = self.current_state_id
        if self.graph[k][action] != -1:
            self.current_state_id = int(self.graph[k][action])
            if self.current_state_id in self.target_ids:
                self.terminal = True
                collided = False
            else:
                self.terminal = False
                collided = False
        else:
            self.terminal = False
            collided = True

        reward, done = self.transition_reward(collided)

        self.update_states()

        return self.history_states, reward, done
 def perform_action(self, act, param, agent):
     if act == 'FORWARD':
         agent.move_forward(param[0])
     elif act == 'TURN':
         agent.turn(param[0])
     else:
         raise error.InvalidAction("Action not recognized: ", act)
Exemple #4
0
    def step(self,
             action: Tuple[int, int],
             player: Optional[int] = None) -> Tuple[Any, float, bool, Dict]:
        """

        Args:
            action: locaton we
            player: In more complex environments, we'll want to ensure we're not playing as the
                the same player twice. This provides a way of checking we're not breaking
                order by mistake

        Returns:
            observation, reward, done, info

        """
        # check the action is valid and the game isn't over
        action = tuple(action)
        if self.board[action] != 0:
            raise error.InvalidAction(f"action {action} is not a vaid choice")
        if self.done:
            raise error.ResetNeeded("Call reset as game is over")
        if player and player != self.curr_turn:
            raise error.InvalidAction(
                f"Player {self.curr_turn}'s turn. Move request from {player}")

        logger.debug("Selected action: %s on turn %d", action,
                     self.turns_played + 1)

        # set the location on the board to the current player. Since curr_turn
        # and current player use the same indicator, we just use that
        self.board[action] = self.curr_turn

        # check if the game is over. Reward is player that won (1 or -1)
        reward = check_win(self.board)
        if reward:
            self.done = True
            return self._get_obs(), float(reward), self.done, {}

        # check if the game is over (i.e. no more turns). Since we don't have a win
        # it must be a draw
        if self.turns_played == 9:
            self.done = True
            return self._get_obs(), 0.0, self.done, {}

        # otherwise game is still going. Advance turn and return state + no reward
        self.curr_turn = next(self.turn_iterator)
        return self._get_obs(), 0.0, self.done, {}
    def _take_action(self, action):
        if action not in BaseEnv.action_space.lookup.keys():
            raise error.InvalidAction()
        else:
            if BaseEnv.action_space.lookup[action] is LONG:
                self.long = self.long + 1

            elif BaseEnv.action_space.lookup[action] is SHORT:
                self.short = self.short + 1
    def step(self, action):
        try:
            player = self.board.get_current_player()
            finished = self.board.place(action)
            self._update_board_render()
        except Board.InvalidMove as e:
            raise error.InvalidAction(repr(e))

        return self.rendered_board, 1 if finished else 0, finished, {
            'board': self.board.board.tolist(),
            'last_player': player
        }
Exemple #7
0
    def step(self, action):
        if self.done:
            raise error.ResetNeeded("")

        r, c, stone = action
        if self.board[r][c] != self.EMPTY:
            raise error.InvalidAction(
                "Stone '{}' already exists in row: {}, col: {}".format(
                    self.board[r][c], r, c))

        if stone >= self.STONE_TYPE_COUNT:
            raise error.InvalidAction("Unknown stone type '{}'".format(stone))

        if stone == self.last_stone:
            raise error.InvalidAction("Need to change stone.")

        self.board[r][c] = self.STONES[stone]
        self.last_stone = self.STONES[stone]
        self.remaining_place -= 1

        reward, self.done = self._check_status()

        return copy.deepcopy(self.board), reward, self.done, {}
 def _perform_action(self, act, parameters, agent):
     """ Applies for selected action for the given agent. """
     if act == KICK:
         agent.kick_ball(self.ball, parameters[0], parameters[1])
     elif act == DASH:
         agent.dash(parameters[0])
     elif act == TURN:
         agent.turn(parameters[0])
     elif act == TO_BALL:
         agent.to_ball(self.ball)
     elif act == SHOOT_GOAL:
         agent.shoot_goal(self.ball, parameters[0])
     elif act == TURN_BALL:
         agent.turn_ball(self.ball, parameters[0])
     elif act == DRIBBLE:
         agent.dribble(self.ball, parameters)
     elif act == KICK_TO:
         agent.kick_to(self.ball, parameters[0])
     else:
         raise error.InvalidAction("Action not recognised: ", act)
Exemple #9
0
    def step(self, action, verbose=True, return_event=False):
        if not self.action_space.contains(action):
            raise error.InvalidAction('Action must be an integer between '
                                      '0 and {}!'.format(self.action_space.n))
        action_str = self.action_names[action]

        # visible_objects = [obj for obj in self.event.metadata['objects'] if obj['visible']]

        # if/else statements below for dealing with up to 13 actions
        if action_str.startswith('Rotate'):
            self.event = self.controller.step(dict(action=action_str))

        elif action_str.startswith('Move') or action_str.startswith('Look'):
            # Move and Look actions
            self.event = self.controller.step(dict(action=action_str))

        elif action_str == 'Stop':
            self.event = self.controller.step(dict(action=action_str))

        else:
            raise NotImplementedError(
                'action_str: {} is not implemented'.format(action_str))

        target_obj = self.event.get_object(self.task.target_id)
        cur_pos = self.event.metadata['agent']['position']
        cur_ori = self.event.metadata['agent']['rotation']
        tgt_pos = target_obj['position']
        state_image, bear = self.preprocess(self.event.frame, cur_pos, cur_ori,
                                            tgt_pos)

        reward, done = self.task.transition_reward(self.event)

        if return_event:
            info = [self.event, bear]
        else:
            info = bear

        return state_image, reward, done, info
Exemple #10
0
    def step(self, action: list):
        # sanity checks
        if self.done:
            raise error.ResetNeeded(
                "Environment is finished, please run env.reset() before taking actions"
            )
        if get_init_len(action) != self.n_agents:
            raise error.InvalidAction(
                f"Length of action array must be same as n_agents({self.n_agents})"
            )
        if any(np.array(action) < 0):
            raise error.InvalidAction(
                f"You can't order negative amount. You agents actions are: {action}"
            )

        # concatenate previous states, self.prev_states in an queue of previous states
        self.prev_states.popleft()
        self.prev_states.append(self._get_observations())
        # make incoming step
        demand = self._get_demand()
        orders_inc = [order.popleft() for order in self.orders]
        self.next_incoming_orders = [
            demand
        ] + orders_inc[:-1]  # what's the demand for each agent
        ship_inc = [shipment.popleft() for shipment in self.inbound_shipments]
        # calculate inbound shipments respecting orders and stock levels
        for i in range(self.n_agents -
                       1):  # manufacturer is assumed to have no constraints
            max_possible_shipment = (max(0, self.stocks[i + 1]) +
                                     ship_inc[i + 1]
                                     )  # stock + incoming shipment
            order = orders_inc[i] + max(
                0,
                -self.stocks[i + 1])  # incoming order + stockout (backorder)
            shipment = min(order, max_possible_shipment)
            self.inbound_shipments[i].append(shipment)
        self.inbound_shipments[-1].append(orders_inc[-1])
        # update stocks
        self.stocks = [(stock + inc)
                       for stock, inc in zip(self.stocks, ship_inc)]
        for i in range(1, self.n_agents):
            self.stocks[i] -= orders_inc[i - 1]
        self.stocks[0] -= demand  # for the retailer
        # update orders
        for i in range(self.n_agents):
            self.orders[i].append(action[i])
        self.next_incoming_orders = [self._get_demand()
                                     ] + [x[0] for x in self.orders[:-1]]

        # calculate costs
        self.holding_cost = np.zeros(self.n_agents, dtype=np.float)
        self.stockout_cost = np.zeros(self.n_agents, dtype=np.float)
        for i in range(self.n_agents):
            if self.stocks[i] >= 0:
                self.holding_cost[i] = (max(0, self.stocks[i]) *
                                        self.score_weight[0][i]
                                        )  # only applicable when stocks > 0
            else:
                self.stockout_cost[i] = (-min(0, self.stocks[i]) *
                                         self.score_weight[1][i]
                                         )  # only applicable when stocks < 0
        self.cum_holding_cost += self.holding_cost
        self.cum_stockout_cost += self.stockout_cost
        # calculate reward
        rewards = self._get_rewards()

        # check if done
        if self.turn == self.n_turns - 1:
            print(
                f"\nTotal cost is: EUR {sum(self.cum_holding_cost + self.cum_stockout_cost)}"
            )
            self.done = True
        else:
            self.turn += 1
        state = self._get_observations()
        # todo flatten observation dict
        return state, rewards, self.done, {}
Exemple #11
0
    def step(self, action, verbose=True, return_event=False):
        if not self.action_space.contains(action):
            raise error.InvalidAction('Action must be an integer between '
                                      '0 and {}!'.format(self.action_space.n))
        action_str = self.action_names[action]
        visible_objects = [
            obj for obj in self.event.metadata['objects'] if obj['visible']
        ]
        for attribute in self.metadata_last_object_attributes:
            self.event.metadata[attribute] = None

        # if/else statements below for dealing with up to 13 actions
        if action_str.endswith('Object'):  # All interactions end with 'Object'
            # Interaction actions
            interaction_obj, distance = None, float('inf')
            inventory_before = self.event.metadata['inventoryObjects'][0]['objectType'] \
                if self.event.metadata['inventoryObjects'] else []
            if action_str.startswith('Put'):
                closest_receptacle = None
                if self.event.metadata['inventoryObjects']:
                    for obj in visible_objects:
                        # look for closest receptacle to put object from inventory
                        closest_receptacle_to_put_object_in = obj['receptacle'] and \
                                                              obj['distance'] < distance \
                                        and obj['objectType'] in self.objects['receptacles']
                        if closest_receptacle_to_put_object_in:
                            closest_receptacle = obj
                            distance = closest_receptacle['distance']
                    if closest_receptacle:
                        interaction_obj = closest_receptacle
                        object_to_put = self.event.metadata[
                            'inventoryObjects'][0]
                        self.event = self.controller.step(
                            dict(action=action_str,
                                 objectId=object_to_put['objectId'],
                                 receptacleObjectId=interaction_obj['objectId']
                                 ))
                        self.event.metadata['lastObjectPut'] = object_to_put
                        self.event.metadata[
                            'lastObjectPutReceptacle'] = interaction_obj
            elif action_str.startswith('Pickup'):
                closest_pickupable = None
                for obj in visible_objects:
                    # look for closest object to pick up
                    closest_object_to_pick_up = obj['pickupable'] and \
                                                obj['distance'] < distance and \
                                obj['objectType'] in self.objects['pickupables']
                    if closest_object_to_pick_up:
                        closest_pickupable = obj
                if closest_pickupable and not self.event.metadata[
                        'inventoryObjects']:
                    interaction_obj = closest_pickupable
                    self.event = self.controller.step(
                        dict(action=action_str,
                             objectId=interaction_obj['objectId']))
                    self.event.metadata['lastObjectPickedUp'] = interaction_obj
            elif action_str.startswith('Open'):
                closest_openable = None
                for obj in visible_objects:
                    # look for closest closed receptacle to open it
                    is_closest_closed_receptacle = obj['openable'] and \
                            obj['distance'] < distance and not obj['isOpen'] and \
                            obj['objectType'] in self.objects['openables']
                    if is_closest_closed_receptacle:
                        closest_openable = obj
                        distance = closest_openable['distance']
                if closest_openable:
                    interaction_obj = closest_openable
                    self.event = self.controller.step(
                        dict(action=action_str,
                             objectId=interaction_obj['objectId']))
                    self.event.metadata['lastObjectOpened'] = interaction_obj
            elif action_str.startswith('Close'):
                closest_openable = None
                for obj in visible_objects:
                    # look for closest opened receptacle to close it
                    is_closest_open_receptacle = obj['openable'] and obj['distance'] < distance \
                                                 and obj['isOpen'] and \
                                                 obj['objectType'] in self.objects['openables']
                    if is_closest_open_receptacle:
                        closest_openable = obj
                        distance = closest_openable['distance']
                if closest_openable:
                    interaction_obj = closest_openable
                    self.event = self.controller.step(
                        dict(action=action_str,
                             objectId=interaction_obj['objectId']))
                    self.event.metadata['lastObjectClosed'] = interaction_obj
            else:
                raise error.InvalidAction(
                    'Invalid interaction {}'.format(action_str))
            # print what object was interacted with and state of inventory
            if interaction_obj and verbose:
                inventory_after = self.event.metadata['inventoryObjects'][0]['objectType'] \
                    if self.event.metadata['inventoryObjects'] else []
                if action_str in ['PutObject', 'PickupObject']:
                    inventory_changed_str = 'Inventory before/after: {}/{}.'.format(
                        inventory_before, inventory_after)
                else:
                    inventory_changed_str = ''
                print('{}: {}. {}'.format(action_str,
                                          interaction_obj['objectType'],
                                          inventory_changed_str))
        elif action_str.startswith('Rotate'):
            if self.continuous_movement:
                # Rotate action
                if action_str.endswith('Left'):
                    self.absolute_rotation -= self.rotation_amount
                elif action_str.endswith('Right'):
                    self.absolute_rotation += self.rotation_amount
                self.event = self.controller.step(
                    dict(action='Rotate', rotation=self.absolute_rotation))
            else:
                # Do normal RotateLeft/Right command in discrete mode (i.e. 3D GridWorld)
                self.event = self.controller.step(dict(action=action_str))
        elif action_str.startswith('Move') or action_str.startswith('Look'):
            # Move and Look actions
            self.event = self.controller.step(dict(action=action_str))
        else:
            raise NotImplementedError(
                'action_str: {} is not implemented'.format(action_str))

        self.task.step_num += 1
        state_image = self.preprocess(self.event.frame)
        reward, done = self.task.transition_reward(self.event)
        if return_event:
            info = self.event
        else:
            info = {}

        return state_image, reward, done, info
Exemple #12
0
 def get_action_name(action):
     if action in ActionSpace.lookup.keys():
         return ActionSpace.lookup[action]
     else:
         raise error.InvalidAction()