def __init__(self, agent_ids):
        self.agent_ids = agent_ids
        self.flat_env = BombermanEnv(agent_ids)

        self.observation_space = spaces.Tuple((spaces.Box(low=0, high=1, shape=(15, 15, 14)),
                                               spaces.Box(low=0, high=1, shape=(4,)),
                                               #spaces.MultiBinary(3),
                                               #spaces.Box(low=0, high=1, shape=(1,)),
                                               spaces.MultiBinary(len(HIGH_LEVEL_ACTIONS))))
        self.action_space = spaces.Discrete(3)
        self.high_level_mode = True
        self.action_buffer = {}
        self.high_low_mapping = {}
예제 #2
0
    def __init__(self,
                 orders,
                 products,
                 start_date,
                 end_date,
                 stock_policy=None,
                 debug=True,
                 reward_function=None):
        """
       init the env by using env = gym.make("Procurement-v0" ....)

        :param orders: Array of orders
        :type orders: []Order (see class Order)
        :param products: Array of products define the initial stock
        :type products: []Product (see class Product)
        :param start_date: Start date of the env
        :type datetime
        :param end_date: End date of the env
        :type datetime
        :param stock_policy: Custom stock policy
        :type dict
        :param debug: enable console prints
        :type bool
        :param reward_function: custom reward function - see example_reward function
        :type function

       :return: env
       """

        if reward_function:
            self.reward_function = reward_function
        else:
            self.reward_function = self.example_reward
        self.orders = orders
        self.products = products
        self.start_date = start_date
        self.current_date = start_date
        self.end_date = end_date
        if stock_policy:
            self.stock_policy = stock_policy
        self.debug = debug
        # Environment OpenAI metadata
        self.reward_range = (-np.inf, 0)
        self.action_space = spaces.Box(
            low=0.0,
            high=self.get_max_product_order_amount(),
            shape=(len(products), self.get_max_product_range()),
            dtype=np.int)  # products x order amounts
        self.observation_space = spaces.Box(
            low=0.0,
            high=self.get_max_product_order_amount(),
            shape=((end_date - start_date).days, len(products), 1),
            # matrix of days, products, amount
            dtype=np.int)
        stock = {}
        for p in products:
            stock[p.get_id()] = p.get_initial_stock()
        self.stock = stock

        if debug:
            print("init finished")
예제 #3
0
import numpy as np
import ray
from gym.vector.utils import spaces
from ray import tune
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.framework import try_import_torch
from ray.tune import Callback

from environment.cartpole_ray import CartPoleEnv

torch, nn = try_import_torch()

custom_input_space = spaces.Box(low=-np.inf,
                                high=np.inf,
                                shape=(2, ),
                                dtype=np.float32)


class TorchCustomModel(TorchModelV2, nn.Module):
    """Example of a PyTorch custom model that just delegates to a fc-net."""
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, custom_input_space, action_space,
                              num_outputs, model_config, name)
        nn.Module.__init__(self)

        self.torch_sub_model = TorchFC(custom_input_space, action_space,
                                       num_outputs, model_config, name)

    def forward(self, input_dict, state, seq_lens):
from typing import Tuple

from gym.vector.utils import spaces
from ray.rllib import MultiAgentEnv
from ray.rllib.utils.typing import MultiAgentDict

from training.hierarchical_learning.bomberman_multi_env import *

import numpy as np

COLLECT_OBSERVATION_SPACE = spaces.Tuple((spaces.Box(low=0, high=1, shape=(15, 15, 11)), spaces.MultiBinary(6)))
DESTROY_OBSERVATION_SPACE = spaces.Tuple((spaces.Box(low=0, high=1, shape=(15, 15, 11)), spaces.MultiBinary(6)))
KILL_OBSERVATION_SPACE = spaces.Tuple((spaces.Box(low=0, high=1, shape=(15, 15, 13)), spaces.MultiBinary(6)))
HIGH_LEVEL_ACTIONS = ['COLLECT', 'DESTROY', 'KILL']

class HierarchicalBombermanMultiEnv(MultiAgentEnv):
    def __init__(self, agent_ids):
        self.agent_ids = agent_ids
        self.flat_env = BombermanEnv(agent_ids)

        self.observation_space = spaces.Tuple((spaces.Box(low=0, high=1, shape=(15, 15, 14)),
                                               spaces.Box(low=0, high=1, shape=(4,)),
                                               #spaces.MultiBinary(3),
                                               #spaces.Box(low=0, high=1, shape=(1,)),
                                               spaces.MultiBinary(len(HIGH_LEVEL_ACTIONS))))
        self.action_space = spaces.Discrete(3)
        self.high_level_mode = True
        self.action_buffer = {}
        self.high_low_mapping = {}

    def reset(self):