def __set_yp_yn_from_action(self, action): if action is None: warnings.warn( "no action was provided. taking default action of not changing allocation" ) else: action = np.array(action) if not (hasattr(action, 'shape') and list(action.shape) == list(self.action_space.shape)): raise error.InvalidAction( 'action shape must be as per env.action_space.shape. Provided action was {0}' .format(action)) if np.round(np.sum(action)) != self.nbikes: raise error.InvalidAction( 'Dimensions of action must sum upto env.metadata["nbikes"]. Provided action was {0} with sum {1}' .format(action, sum(action))) if np.any(action < -1e-6): raise error.InvalidAction( 'Each dimension of action must be positive. Provided action was {0}' .format(action)) if np.any(action > self.capacities + 1e-6): raise error.InvalidAction( 'Individual dimensions of action must be less than respective dimentions of env.metadata["capacities"]. Provided action was {0}' .format(self.capacities - action)) # print("action: ", action) # print("current_alloc", self.__ds[self.__t]) alloc_diff = action - np.array(self.__ds[self.__t]) yn = alloc_diff * (alloc_diff > 0) yp = -alloc_diff * (alloc_diff < 0) self.__yp[self.__t] = list(yp) self.__yn[self.__t] = list(yn)
def step(self, action): ''' 0: move ahead 1: move back 2: rotate right 3: rotate left 4: look down 5: look up ''' if action >= self.action_space: raise error.InvalidAction('Action must be an integer between ' '0 and {}!'.format(self.action_space - 1)) k = self.current_state_id if self.graph[k][action] != -1: self.current_state_id = int(self.graph[k][action]) if self.current_state_id in self.target_ids: self.terminal = True collided = False else: self.terminal = False collided = False else: self.terminal = False collided = True reward, done = self.transition_reward(collided) self.update_states() return self.history_states, reward, done
def perform_action(self, act, param, agent): if act == 'FORWARD': agent.move_forward(param[0]) elif act == 'TURN': agent.turn(param[0]) else: raise error.InvalidAction("Action not recognized: ", act)
def step(self, action: Tuple[int, int], player: Optional[int] = None) -> Tuple[Any, float, bool, Dict]: """ Args: action: locaton we player: In more complex environments, we'll want to ensure we're not playing as the the same player twice. This provides a way of checking we're not breaking order by mistake Returns: observation, reward, done, info """ # check the action is valid and the game isn't over action = tuple(action) if self.board[action] != 0: raise error.InvalidAction(f"action {action} is not a vaid choice") if self.done: raise error.ResetNeeded("Call reset as game is over") if player and player != self.curr_turn: raise error.InvalidAction( f"Player {self.curr_turn}'s turn. Move request from {player}") logger.debug("Selected action: %s on turn %d", action, self.turns_played + 1) # set the location on the board to the current player. Since curr_turn # and current player use the same indicator, we just use that self.board[action] = self.curr_turn # check if the game is over. Reward is player that won (1 or -1) reward = check_win(self.board) if reward: self.done = True return self._get_obs(), float(reward), self.done, {} # check if the game is over (i.e. no more turns). Since we don't have a win # it must be a draw if self.turns_played == 9: self.done = True return self._get_obs(), 0.0, self.done, {} # otherwise game is still going. Advance turn and return state + no reward self.curr_turn = next(self.turn_iterator) return self._get_obs(), 0.0, self.done, {}
def _take_action(self, action): if action not in BaseEnv.action_space.lookup.keys(): raise error.InvalidAction() else: if BaseEnv.action_space.lookup[action] is LONG: self.long = self.long + 1 elif BaseEnv.action_space.lookup[action] is SHORT: self.short = self.short + 1
def step(self, action): try: player = self.board.get_current_player() finished = self.board.place(action) self._update_board_render() except Board.InvalidMove as e: raise error.InvalidAction(repr(e)) return self.rendered_board, 1 if finished else 0, finished, { 'board': self.board.board.tolist(), 'last_player': player }
def step(self, action): if self.done: raise error.ResetNeeded("") r, c, stone = action if self.board[r][c] != self.EMPTY: raise error.InvalidAction( "Stone '{}' already exists in row: {}, col: {}".format( self.board[r][c], r, c)) if stone >= self.STONE_TYPE_COUNT: raise error.InvalidAction("Unknown stone type '{}'".format(stone)) if stone == self.last_stone: raise error.InvalidAction("Need to change stone.") self.board[r][c] = self.STONES[stone] self.last_stone = self.STONES[stone] self.remaining_place -= 1 reward, self.done = self._check_status() return copy.deepcopy(self.board), reward, self.done, {}
def _perform_action(self, act, parameters, agent): """ Applies for selected action for the given agent. """ if act == KICK: agent.kick_ball(self.ball, parameters[0], parameters[1]) elif act == DASH: agent.dash(parameters[0]) elif act == TURN: agent.turn(parameters[0]) elif act == TO_BALL: agent.to_ball(self.ball) elif act == SHOOT_GOAL: agent.shoot_goal(self.ball, parameters[0]) elif act == TURN_BALL: agent.turn_ball(self.ball, parameters[0]) elif act == DRIBBLE: agent.dribble(self.ball, parameters) elif act == KICK_TO: agent.kick_to(self.ball, parameters[0]) else: raise error.InvalidAction("Action not recognised: ", act)
def step(self, action, verbose=True, return_event=False): if not self.action_space.contains(action): raise error.InvalidAction('Action must be an integer between ' '0 and {}!'.format(self.action_space.n)) action_str = self.action_names[action] # visible_objects = [obj for obj in self.event.metadata['objects'] if obj['visible']] # if/else statements below for dealing with up to 13 actions if action_str.startswith('Rotate'): self.event = self.controller.step(dict(action=action_str)) elif action_str.startswith('Move') or action_str.startswith('Look'): # Move and Look actions self.event = self.controller.step(dict(action=action_str)) elif action_str == 'Stop': self.event = self.controller.step(dict(action=action_str)) else: raise NotImplementedError( 'action_str: {} is not implemented'.format(action_str)) target_obj = self.event.get_object(self.task.target_id) cur_pos = self.event.metadata['agent']['position'] cur_ori = self.event.metadata['agent']['rotation'] tgt_pos = target_obj['position'] state_image, bear = self.preprocess(self.event.frame, cur_pos, cur_ori, tgt_pos) reward, done = self.task.transition_reward(self.event) if return_event: info = [self.event, bear] else: info = bear return state_image, reward, done, info
def step(self, action: list): # sanity checks if self.done: raise error.ResetNeeded( "Environment is finished, please run env.reset() before taking actions" ) if get_init_len(action) != self.n_agents: raise error.InvalidAction( f"Length of action array must be same as n_agents({self.n_agents})" ) if any(np.array(action) < 0): raise error.InvalidAction( f"You can't order negative amount. You agents actions are: {action}" ) # concatenate previous states, self.prev_states in an queue of previous states self.prev_states.popleft() self.prev_states.append(self._get_observations()) # make incoming step demand = self._get_demand() orders_inc = [order.popleft() for order in self.orders] self.next_incoming_orders = [ demand ] + orders_inc[:-1] # what's the demand for each agent ship_inc = [shipment.popleft() for shipment in self.inbound_shipments] # calculate inbound shipments respecting orders and stock levels for i in range(self.n_agents - 1): # manufacturer is assumed to have no constraints max_possible_shipment = (max(0, self.stocks[i + 1]) + ship_inc[i + 1] ) # stock + incoming shipment order = orders_inc[i] + max( 0, -self.stocks[i + 1]) # incoming order + stockout (backorder) shipment = min(order, max_possible_shipment) self.inbound_shipments[i].append(shipment) self.inbound_shipments[-1].append(orders_inc[-1]) # update stocks self.stocks = [(stock + inc) for stock, inc in zip(self.stocks, ship_inc)] for i in range(1, self.n_agents): self.stocks[i] -= orders_inc[i - 1] self.stocks[0] -= demand # for the retailer # update orders for i in range(self.n_agents): self.orders[i].append(action[i]) self.next_incoming_orders = [self._get_demand() ] + [x[0] for x in self.orders[:-1]] # calculate costs self.holding_cost = np.zeros(self.n_agents, dtype=np.float) self.stockout_cost = np.zeros(self.n_agents, dtype=np.float) for i in range(self.n_agents): if self.stocks[i] >= 0: self.holding_cost[i] = (max(0, self.stocks[i]) * self.score_weight[0][i] ) # only applicable when stocks > 0 else: self.stockout_cost[i] = (-min(0, self.stocks[i]) * self.score_weight[1][i] ) # only applicable when stocks < 0 self.cum_holding_cost += self.holding_cost self.cum_stockout_cost += self.stockout_cost # calculate reward rewards = self._get_rewards() # check if done if self.turn == self.n_turns - 1: print( f"\nTotal cost is: EUR {sum(self.cum_holding_cost + self.cum_stockout_cost)}" ) self.done = True else: self.turn += 1 state = self._get_observations() # todo flatten observation dict return state, rewards, self.done, {}
def step(self, action, verbose=True, return_event=False): if not self.action_space.contains(action): raise error.InvalidAction('Action must be an integer between ' '0 and {}!'.format(self.action_space.n)) action_str = self.action_names[action] visible_objects = [ obj for obj in self.event.metadata['objects'] if obj['visible'] ] for attribute in self.metadata_last_object_attributes: self.event.metadata[attribute] = None # if/else statements below for dealing with up to 13 actions if action_str.endswith('Object'): # All interactions end with 'Object' # Interaction actions interaction_obj, distance = None, float('inf') inventory_before = self.event.metadata['inventoryObjects'][0]['objectType'] \ if self.event.metadata['inventoryObjects'] else [] if action_str.startswith('Put'): closest_receptacle = None if self.event.metadata['inventoryObjects']: for obj in visible_objects: # look for closest receptacle to put object from inventory closest_receptacle_to_put_object_in = obj['receptacle'] and \ obj['distance'] < distance \ and obj['objectType'] in self.objects['receptacles'] if closest_receptacle_to_put_object_in: closest_receptacle = obj distance = closest_receptacle['distance'] if closest_receptacle: interaction_obj = closest_receptacle object_to_put = self.event.metadata[ 'inventoryObjects'][0] self.event = self.controller.step( dict(action=action_str, objectId=object_to_put['objectId'], receptacleObjectId=interaction_obj['objectId'] )) self.event.metadata['lastObjectPut'] = object_to_put self.event.metadata[ 'lastObjectPutReceptacle'] = interaction_obj elif action_str.startswith('Pickup'): closest_pickupable = None for obj in visible_objects: # look for closest object to pick up closest_object_to_pick_up = obj['pickupable'] and \ obj['distance'] < distance and \ obj['objectType'] in self.objects['pickupables'] if closest_object_to_pick_up: closest_pickupable = obj if closest_pickupable and not self.event.metadata[ 'inventoryObjects']: interaction_obj = closest_pickupable self.event = self.controller.step( dict(action=action_str, objectId=interaction_obj['objectId'])) self.event.metadata['lastObjectPickedUp'] = interaction_obj elif action_str.startswith('Open'): closest_openable = None for obj in visible_objects: # look for closest closed receptacle to open it is_closest_closed_receptacle = obj['openable'] and \ obj['distance'] < distance and not obj['isOpen'] and \ obj['objectType'] in self.objects['openables'] if is_closest_closed_receptacle: closest_openable = obj distance = closest_openable['distance'] if closest_openable: interaction_obj = closest_openable self.event = self.controller.step( dict(action=action_str, objectId=interaction_obj['objectId'])) self.event.metadata['lastObjectOpened'] = interaction_obj elif action_str.startswith('Close'): closest_openable = None for obj in visible_objects: # look for closest opened receptacle to close it is_closest_open_receptacle = obj['openable'] and obj['distance'] < distance \ and obj['isOpen'] and \ obj['objectType'] in self.objects['openables'] if is_closest_open_receptacle: closest_openable = obj distance = closest_openable['distance'] if closest_openable: interaction_obj = closest_openable self.event = self.controller.step( dict(action=action_str, objectId=interaction_obj['objectId'])) self.event.metadata['lastObjectClosed'] = interaction_obj else: raise error.InvalidAction( 'Invalid interaction {}'.format(action_str)) # print what object was interacted with and state of inventory if interaction_obj and verbose: inventory_after = self.event.metadata['inventoryObjects'][0]['objectType'] \ if self.event.metadata['inventoryObjects'] else [] if action_str in ['PutObject', 'PickupObject']: inventory_changed_str = 'Inventory before/after: {}/{}.'.format( inventory_before, inventory_after) else: inventory_changed_str = '' print('{}: {}. {}'.format(action_str, interaction_obj['objectType'], inventory_changed_str)) elif action_str.startswith('Rotate'): if self.continuous_movement: # Rotate action if action_str.endswith('Left'): self.absolute_rotation -= self.rotation_amount elif action_str.endswith('Right'): self.absolute_rotation += self.rotation_amount self.event = self.controller.step( dict(action='Rotate', rotation=self.absolute_rotation)) else: # Do normal RotateLeft/Right command in discrete mode (i.e. 3D GridWorld) self.event = self.controller.step(dict(action=action_str)) elif action_str.startswith('Move') or action_str.startswith('Look'): # Move and Look actions self.event = self.controller.step(dict(action=action_str)) else: raise NotImplementedError( 'action_str: {} is not implemented'.format(action_str)) self.task.step_num += 1 state_image = self.preprocess(self.event.frame) reward, done = self.task.transition_reward(self.event) if return_event: info = self.event else: info = {} return state_image, reward, done, info
def get_action_name(action): if action in ActionSpace.lookup.keys(): return ActionSpace.lookup[action] else: raise error.InvalidAction()