def __init__(self, agent_ids): self.agent_ids = agent_ids self.flat_env = BombermanEnv(agent_ids) self.observation_space = spaces.Tuple((spaces.Box(low=0, high=1, shape=(15, 15, 14)), spaces.Box(low=0, high=1, shape=(4,)), #spaces.MultiBinary(3), #spaces.Box(low=0, high=1, shape=(1,)), spaces.MultiBinary(len(HIGH_LEVEL_ACTIONS)))) self.action_space = spaces.Discrete(3) self.high_level_mode = True self.action_buffer = {} self.high_low_mapping = {}
def __init__(self, orders, products, start_date, end_date, stock_policy=None, debug=True, reward_function=None): """ init the env by using env = gym.make("Procurement-v0" ....) :param orders: Array of orders :type orders: []Order (see class Order) :param products: Array of products define the initial stock :type products: []Product (see class Product) :param start_date: Start date of the env :type datetime :param end_date: End date of the env :type datetime :param stock_policy: Custom stock policy :type dict :param debug: enable console prints :type bool :param reward_function: custom reward function - see example_reward function :type function :return: env """ if reward_function: self.reward_function = reward_function else: self.reward_function = self.example_reward self.orders = orders self.products = products self.start_date = start_date self.current_date = start_date self.end_date = end_date if stock_policy: self.stock_policy = stock_policy self.debug = debug # Environment OpenAI metadata self.reward_range = (-np.inf, 0) self.action_space = spaces.Box( low=0.0, high=self.get_max_product_order_amount(), shape=(len(products), self.get_max_product_range()), dtype=np.int) # products x order amounts self.observation_space = spaces.Box( low=0.0, high=self.get_max_product_order_amount(), shape=((end_date - start_date).days, len(products), 1), # matrix of days, products, amount dtype=np.int) stock = {} for p in products: stock[p.get_id()] = p.get_initial_stock() self.stock = stock if debug: print("init finished")
import numpy as np import ray from gym.vector.utils import spaces from ray import tune from ray.rllib.models import ModelCatalog from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from ray.rllib.utils.framework import try_import_torch from ray.tune import Callback from environment.cartpole_ray import CartPoleEnv torch, nn = try_import_torch() custom_input_space = spaces.Box(low=-np.inf, high=np.inf, shape=(2, ), dtype=np.float32) class TorchCustomModel(TorchModelV2, nn.Module): """Example of a PyTorch custom model that just delegates to a fc-net.""" def __init__(self, obs_space, action_space, num_outputs, model_config, name): TorchModelV2.__init__(self, custom_input_space, action_space, num_outputs, model_config, name) nn.Module.__init__(self) self.torch_sub_model = TorchFC(custom_input_space, action_space, num_outputs, model_config, name) def forward(self, input_dict, state, seq_lens):
from typing import Tuple from gym.vector.utils import spaces from ray.rllib import MultiAgentEnv from ray.rllib.utils.typing import MultiAgentDict from training.hierarchical_learning.bomberman_multi_env import * import numpy as np COLLECT_OBSERVATION_SPACE = spaces.Tuple((spaces.Box(low=0, high=1, shape=(15, 15, 11)), spaces.MultiBinary(6))) DESTROY_OBSERVATION_SPACE = spaces.Tuple((spaces.Box(low=0, high=1, shape=(15, 15, 11)), spaces.MultiBinary(6))) KILL_OBSERVATION_SPACE = spaces.Tuple((spaces.Box(low=0, high=1, shape=(15, 15, 13)), spaces.MultiBinary(6))) HIGH_LEVEL_ACTIONS = ['COLLECT', 'DESTROY', 'KILL'] class HierarchicalBombermanMultiEnv(MultiAgentEnv): def __init__(self, agent_ids): self.agent_ids = agent_ids self.flat_env = BombermanEnv(agent_ids) self.observation_space = spaces.Tuple((spaces.Box(low=0, high=1, shape=(15, 15, 14)), spaces.Box(low=0, high=1, shape=(4,)), #spaces.MultiBinary(3), #spaces.Box(low=0, high=1, shape=(1,)), spaces.MultiBinary(len(HIGH_LEVEL_ACTIONS)))) self.action_space = spaces.Discrete(3) self.high_level_mode = True self.action_buffer = {} self.high_low_mapping = {} def reset(self):