Python ResetNeededの例

プログラミング言語: Python

名前空間/パッケージ名: gym.error

メソッド/関数: ResetNeeded

hotexamples.comのコード掲載数: 6

Python ResetNeeded - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのgym.error.ResetNeededの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: stats_recorder.py プロジェクト: srivignessh/cartpole-ai

    def before_step(self, action):
        assert not self.closed

        if self.done:
            raise error.ResetNeeded("Trying to step environment which is currently done. While the monitor is active for {}, you cannot step beyond the end of an episode. Call 'env.reset()' to start the next episode.".format(self.env_id))
        elif self.steps is None:
            raise error.ResetNeeded("Trying to step an environment before reset. While the monitor is active for {}, you must call 'env.reset()' before taking an initial step.".format(self.env_id))

コード例 #2

ファイルを表示

ファイル: trace_recorder.py プロジェクト: passiweinberger/gym

    def before_step(self, action):
        assert not self.closed

        if self.done:
            raise error.ResetNeeded(
                "Trying to step environment which is currently done. While the monitor is active, you cannot step beyond the end of an episode. Call 'env.reset()' to start the next episode."
            )

        self.actions.append(action)

コード例 #3

ファイルを表示

    def step(self,
             action: Tuple[int, int],
             player: Optional[int] = None) -> Tuple[Any, float, bool, Dict]:
        """

        Args:
            action: locaton we
            player: In more complex environments, we'll want to ensure we're not playing as the
                the same player twice. This provides a way of checking we're not breaking
                order by mistake

        Returns:
            observation, reward, done, info

        """
        # check the action is valid and the game isn't over
        action = tuple(action)
        if self.board[action] != 0:
            raise error.InvalidAction(f"action {action} is not a vaid choice")
        if self.done:
            raise error.ResetNeeded("Call reset as game is over")
        if player and player != self.curr_turn:
            raise error.InvalidAction(
                f"Player {self.curr_turn}'s turn. Move request from {player}")

        logger.debug("Selected action: %s on turn %d", action,
                     self.turns_played + 1)

        # set the location on the board to the current player. Since curr_turn
        # and current player use the same indicator, we just use that
        self.board[action] = self.curr_turn

        # check if the game is over. Reward is player that won (1 or -1)
        reward = check_win(self.board)
        if reward:
            self.done = True
            return self._get_obs(), float(reward), self.done, {}

        # check if the game is over (i.e. no more turns). Since we don't have a win
        # it must be a draw
        if self.turns_played == 9:
            self.done = True
            return self._get_obs(), 0.0, self.done, {}

        # otherwise game is still going. Advance turn and return state + no reward
        self.curr_turn = next(self.turn_iterator)
        return self._get_obs(), 0.0, self.done, {}

コード例 #4

ファイルを表示

ファイル: unrealizedPnL.py プロジェクト: Tornadoofsoul/gym-cryptotrading

    def step(self, action):
        if not self.episode_number or self.timesteps is self.horizon:
            raise error.ResetNeeded()

        state = self._get_new_state()
        self._take_action(action)
        reward = self._get_reward()

        message = "Timestep {}:==: Action: {} ; Reward: {}".format(
            self.timesteps, BaseEnv.action_space.lookup[action], reward)
        self.logger.debug(message)

        self.timesteps = self.timesteps + 1
        if self.timesteps is not self.horizon:
            self.current = self.current + 1
            return state, reward, False, float(self.horizon - self.timesteps)
        else:
            return state, reward, True, 0.0

コード例 #5

ファイルを表示

    def step(self, action):
        if self.done:
            raise error.ResetNeeded("")

        r, c, stone = action
        if self.board[r][c] != self.EMPTY:
            raise error.InvalidAction(
                "Stone '{}' already exists in row: {}, col: {}".format(
                    self.board[r][c], r, c))

        if stone >= self.STONE_TYPE_COUNT:
            raise error.InvalidAction("Unknown stone type '{}'".format(stone))

        if stone == self.last_stone:
            raise error.InvalidAction("Need to change stone.")

        self.board[r][c] = self.STONES[stone]
        self.last_stone = self.STONES[stone]
        self.remaining_place -= 1

        reward, self.done = self._check_status()

        return copy.deepcopy(self.board), reward, self.done, {}

コード例 #6

ファイルを表示

    def step(self, action: list):
        # sanity checks
        if self.done:
            raise error.ResetNeeded(
                "Environment is finished, please run env.reset() before taking actions"
            )
        if get_init_len(action) != self.n_agents:
            raise error.InvalidAction(
                f"Length of action array must be same as n_agents({self.n_agents})"
            )
        if any(np.array(action) < 0):
            raise error.InvalidAction(
                f"You can't order negative amount. You agents actions are: {action}"
            )

        # concatenate previous states, self.prev_states in an queue of previous states
        self.prev_states.popleft()
        self.prev_states.append(self._get_observations())
        # make incoming step
        demand = self._get_demand()
        orders_inc = [order.popleft() for order in self.orders]
        self.next_incoming_orders = [
            demand
        ] + orders_inc[:-1]  # what's the demand for each agent
        ship_inc = [shipment.popleft() for shipment in self.inbound_shipments]
        # calculate inbound shipments respecting orders and stock levels
        for i in range(self.n_agents -
                       1):  # manufacturer is assumed to have no constraints
            max_possible_shipment = (max(0, self.stocks[i + 1]) +
                                     ship_inc[i + 1]
                                     )  # stock + incoming shipment
            order = orders_inc[i] + max(
                0,
                -self.stocks[i + 1])  # incoming order + stockout (backorder)
            shipment = min(order, max_possible_shipment)
            self.inbound_shipments[i].append(shipment)
        self.inbound_shipments[-1].append(orders_inc[-1])
        # update stocks
        self.stocks = [(stock + inc)
                       for stock, inc in zip(self.stocks, ship_inc)]
        for i in range(1, self.n_agents):
            self.stocks[i] -= orders_inc[i - 1]
        self.stocks[0] -= demand  # for the retailer
        # update orders
        for i in range(self.n_agents):
            self.orders[i].append(action[i])
        self.next_incoming_orders = [self._get_demand()
                                     ] + [x[0] for x in self.orders[:-1]]

        # calculate costs
        self.holding_cost = np.zeros(self.n_agents, dtype=np.float)
        self.stockout_cost = np.zeros(self.n_agents, dtype=np.float)
        for i in range(self.n_agents):
            if self.stocks[i] >= 0:
                self.holding_cost[i] = (max(0, self.stocks[i]) *
                                        self.score_weight[0][i]
                                        )  # only applicable when stocks > 0
            else:
                self.stockout_cost[i] = (-min(0, self.stocks[i]) *
                                         self.score_weight[1][i]
                                         )  # only applicable when stocks < 0
        self.cum_holding_cost += self.holding_cost
        self.cum_stockout_cost += self.stockout_cost
        # calculate reward
        rewards = self._get_rewards()

        # check if done
        if self.turn == self.n_turns - 1:
            print(
                f"\nTotal cost is: EUR {sum(self.cum_holding_cost + self.cum_stockout_cost)}"
            )
            self.done = True
        else:
            self.turn += 1
        state = self._get_observations()
        # todo flatten observation dict
        return state, rewards, self.done, {}