def __init__(self, env_config): self.state = None self.agent_1 = 0 self.agent_2 = 1 # MADDPG emits action logits instead of actual discrete actions self.actions_are_logits = env_config.get("actions_are_logits", False) self.one_hot_state_encoding = env_config.get("one_hot_state_encoding", False) self.with_state = env_config.get("separate_state_space", False) if not self.one_hot_state_encoding: self.observation_space = Discrete(6) self.with_state = False else: # Each agent gets the full state (one-hot encoding of which of the # three states are active) as input with the receiving agent's # ID (1 or 2) concatenated onto the end. if self.with_state: self.observation_space = Dict({ "obs": MultiDiscrete([2, 2, 2, 3]), ENV_STATE: MultiDiscrete([2, 2, 2]) }) else: self.observation_space = MultiDiscrete([2, 2, 2, 3])
def __init__(self, start, goal): """ Initializes the environment. Args: start (float[4]) : [x, y, v_xi, x_yi] goal (float[4]) : [x, y, v_xf, v_yf] obstacles [height (int) x width (int)]: square matrix with 0 indicating empty and 1 is obstacle Returns: Initialized object duh""" self.start = start self.goal = goal self.space = DoubleIntegratorVisualizer() self.visualizer = PlanVisualizationProgram( self.space.get_planning_problem(self.start, self.goal), "ao-rrt", "data/ao-rrt") self.visualizer.width = self.visualizer.height = 640 glutInit([]) self.visualizer.initWindow() self.current_state = start self.V = [start] # Vertices of the graph self.E = [] # Edges of the graph self.N = 0 self.new = False self.obstacles = None # TODO # Action space is time, u_y, and u_x. Acceleration can be positive or negative. self.action_space = Box(np.array([1.0, -1.0, -1.0]), np.array([10.0, 1.0, 1.0])) # Observation space is width x height, current coordinates, goal coordinates, v_x, v_y self.observation_space = Tuple( (MultiDiscrete([60, 60]), MultiDiscrete([60, 60]), MultiDiscrete([60, 60]), Box(np.array([-25, -25]), np.array([25, 25]))))
def observation_space(self): if self.action_space_type == ActionSpaceType.DIFFERENTIAL: # 4 is the number of possible direction - nord, sud, west, east return MultiDiscrete( (self.columns, self.rows, Direction.NB_DIRECTIONS)) else: return MultiDiscrete((self.columns, self.rows))
def __init__(self): self.plant_deck = { "sunflower": Sunflower, "peashooter": Peashooter, "wall-nut": Wallnut } self.action_space = Discrete( len(self.plant_deck) * config.N_LANES * config.LANE_LENGTH + 1) # self.action_space = MultiDiscrete([len(self.plant_deck), config.N_LANES, config.LANE_LENGTH]) # plant, lane, pos self.observation_space = Tuple([ MultiDiscrete([len(self.plant_deck) + 1] * (config.N_LANES * config.LANE_LENGTH)), MultiDiscrete([MAX_ZOMBIE_PER_CELL + 1] * (config.N_LANES * config.LANE_LENGTH)), Discrete(MAX_SUN), MultiBinary(len(self.plant_deck)) ]) # Action available "Which plant on the cell, is the lane attacked, is there a mower on the lane" self._plant_names = [plant_name for plant_name in self.plant_deck] self._plant_classes = [ self.plant_deck[plant_name].__name__ for plant_name in self.plant_deck ] self._plant_no = { self._plant_classes[i]: i for i in range(len(self._plant_names)) } self._scene = Scene(self.plant_deck, BasicZombieSpawner()) self._reward = 0
def build_agent_spaces(self) -> Tuple[Space, Space]: """Construct the action and observation spaces Description of actions and observations: https://github.com/google-research/football/blob/master/gfootball/doc/observation.md """ # noqa: E501 action_space = Discrete(19) # The football field's corners are [+-1., +-0.42]. However, the players # and balls may get out of the field. Thus we multiply those limits by # a factor of 2. xlim = 1. * 2 ylim = 0.42 * 2 num_players: int = 11 xy_space = Box( np.array([-xlim, -ylim], dtype=np.float32), np.array([xlim, ylim], dtype=np.float32)) xyz_space = Box( np.array([-xlim, -ylim, 0], dtype=np.float32), np.array([xlim, ylim, np.inf], dtype=np.float32)) observation_space = DictSpace({ "controlled_players": Discrete(2), "players_raw": TupleSpace([ DictSpace({ # ball information "ball": xyz_space, "ball_direction": Box(-np.inf, np.inf, (3, )), "ball_rotation": Box(-np.inf, np.inf, (3, )), "ball_owned_team": Discrete(3), "ball_owned_player": Discrete(num_players + 1), # left team "left_team": TupleSpace([xy_space] * num_players), "left_team_direction": TupleSpace( [xy_space] * num_players), "left_team_tired_factor": Box(0., 1., (num_players, )), "left_team_yellow_card": MultiBinary(num_players), "left_team_active": MultiBinary(num_players), "left_team_roles": MultiDiscrete([10] * num_players), # right team "right_team": TupleSpace([xy_space] * num_players), "right_team_direction": TupleSpace( [xy_space] * num_players), "right_team_tired_factor": Box(0., 1., (num_players, )), "right_team_yellow_card": MultiBinary(num_players), "right_team_active": MultiBinary(num_players), "right_team_roles": MultiDiscrete([10] * num_players), # controlled player information "active": Discrete(num_players), "designated": Discrete(num_players), "sticky_actions": MultiBinary(10), # match state "score": Box(-np.inf, np.inf, (2, )), "steps_left": Box(0, np.inf, (1, )), "game_mode": Discrete(7) }) ]) }) return action_space, observation_space
class AvailActionsTestEnv(MultiAgentEnv): num_actions = 10 action_space = Discrete(num_actions) observation_space = Dict({ "obs": Dict({ "test": Dict({ "a": Discrete(2), "b": MultiDiscrete([2, 3, 4]) }), "state": MultiDiscrete([2, 2, 2]), }), "action_mask": Box(0, 1, (num_actions, )), }) def __init__(self, env_config): super().__init__() self.state = None self.avail = env_config.get("avail_actions", [3]) self.action_mask = np.array([0] * 10) for a in self.avail: self.action_mask[a] = 1 def reset(self): self.state = 0 return { "agent_1": { "obs": self.observation_space["obs"].sample(), "action_mask": self.action_mask, }, "agent_2": { "obs": self.observation_space["obs"].sample(), "action_mask": self.action_mask, }, } def step(self, action_dict): if self.state > 0: assert (action_dict["agent_1"] in self.avail and action_dict["agent_2"] in self.avail), "Failed to obey available actions mask!" self.state += 1 rewards = {"agent_1": 1, "agent_2": 0.5} obs = { "agent_1": { "obs": self.observation_space["obs"].sample(), "action_mask": self.action_mask, }, "agent_2": { "obs": self.observation_space["obs"].sample(), "action_mask": self.action_mask, }, } dones = {"__all__": self.state >= 20} return obs, rewards, dones, {}
def test_encoder_with_sampling(space): """Test space_encoder with sampling.""" NUM_SAMPLES = int(np.prod(space)) x = MultiDiscrete(space) e = Encoder(x) for _ in range(NUM_SAMPLES): i = x.sample() enc = e.encode(i) dec = e.decode(enc) assert np.equal(i, dec).all()
def __init__(self): self.action_space = MultiDiscrete([2, 3, 4]) self.observation_space = MultiDiscrete([4, 5]) self.current_step = 0 self._valid_actions1 = torch.ones(self.action_space.nvec[0]) self._valid_actions2 = torch.ones(self.action_space.nvec[0], self.action_space.nvec[1]) self._valid_actions3 = torch.ones(self.action_space.nvec[0], self.action_space.nvec[1], self.action_space.nvec[2]) self._action_mask = [ self._valid_actions1, self._valid_actions2, self._valid_actions3 ]
def __init__(self, env_config): self.env_config = env_config self.reference_world = ws.WorldBuilder.create() self.current_iteration = 0 self.n_iterations = 0 self.product_ids = self._product_ids() self.max_sources_per_facility = 0 self.max_fleet_size = 0 self.facility_types = {} facility_class_id = 0 for f in self.reference_world.facilities.values(): if f.consumer is not None: sources_num = len(f.consumer.sources) if sources_num > self.max_sources_per_facility: self.max_sources_per_facility = sources_num if f.distribution is not None: if len(f.distribution.fleet) > self.max_fleet_size: self.max_fleet_size = len(f.distribution.fleet) facility_class = f.__class__.__name__ if facility_class not in self.facility_types: self.facility_types[facility_class] = facility_class_id facility_class_id += 1 self.state_calculator = StateCalculator(self) self.reward_calculator = RewardCalculator(env_config) self.action_calculator = ActionCalculator(self) self.action_space_producer = MultiDiscrete([ 8, # unit price 6, # production rate level ]) self.action_space_consumer = MultiDiscrete([ self.n_products(), # consumer product id self.max_sources_per_facility, # consumer source id 6 # consumer_quantity ]) example_state, _ = self.state_calculator.world_to_state( self.reference_world) state_dim = len(list(example_state.values())[0]) self.observation_space = Box(low=0.00, high=1.00, shape=(state_dim, ), dtype=np.float64)
def __init__(self, env, stack_size=4): """Wrapper that returns stacks of the last n timesteps Args: stack_size: number of observations to be returned """ super().__init__(env) # older observations will have lower index in the buffer self._buffer = None self._stack_size = stack_size old_space_screen = env.observation_space[0] old_space_movmnt = env.observation_space[1] # the new state space for the screen observation is just the previous but repeated new_space_screen = Box(old_space_screen.low.reshape( -1, *old_space_screen.low.shape).repeat(stack_size, axis=0), old_space_screen.high.reshape( -1, *old_space_screen.high.shape).repeat( stack_size, axis=0), dtype=old_space_screen.dtype) # the new state space for the position is like the previous but repeated new_space_movmnt = MultiDiscrete( [old_space_movmnt.n for _ in range(stack_size)]) # the observations will be tuples of (stack of screens, stacks of positions) self.observation_space = Tuple([new_space_screen, new_space_movmnt])
def __init__(self, **smac_args): """Create a new multi-agent StarCraft env compatible with RLlib. Arguments: smac_args (dict): Arguments to pass to the underlying smac.env.starcraft.StarCraft2Env instance. Examples: >>> from smac_rllib import RLlibStarCraft2Env >>> env = RLlibStarCraft2Env(map_name="8m") >>> print(env.reset()) """ self._env = StarCraft2Env(**smac_args) self.horizon = self._env.episode_limit self.nbr_agents = self._env.n_agents self._ready_agents = [] self.observation_space = Dict({ "obs": Box(-1, 1, shape=(self.nbr_agents, self._env.get_obs_size(),)), "avail_actions": Box(0, 1, shape=(self.nbr_agents, self._env.get_total_actions(),)), "state": Box(-float('inf'), float('inf'), shape=(self._env.get_state_size(),)), "battle_won": Box(0,1, shape=(1,), dtype=np.bool), "dead_allies": Box(0,self.nbr_agents, shape=(1,), dtype=np.int), "dead_enemies": Box(0, int(1e3), shape=(1,), dtype=np.int) }) self.action_space = MultiDiscrete([self._env.get_total_actions()] * self.nbr_agents)
def __init__(self, env, body_names, radius_multiplier=1.5, agent_idx_allowed_to_lock=None, lock_type="any_lock", ac_obs_prefix='', obj_in_game_metadata_keys=None, agent_allowed_to_lock_keys=None): super().__init__(env) self.n_agents = self.unwrapped.n_agents self.n_obj = len(body_names) self.body_names = body_names self.agent_idx_allowed_to_lock = np.arange( self.n_agents ) if agent_idx_allowed_to_lock is None else agent_idx_allowed_to_lock self.lock_type = lock_type self.ac_obs_prefix = ac_obs_prefix self.obj_in_game_metadata_keys = obj_in_game_metadata_keys self.agent_allowed_to_lock_keys = agent_allowed_to_lock_keys self.action_space.spaces[f'action_{ac_obs_prefix}glue'] = (Tuple( [MultiDiscrete([2] * self.n_obj) for _ in range(self.n_agents)])) self.observation_space = update_obs_space( env, { f'{ac_obs_prefix}obj_lock': (self.n_obj, 1), f'{ac_obs_prefix}you_lock': (self.n_agents, self.n_obj, 1), f'{ac_obs_prefix}team_lock': (self.n_agents, self.n_obj, 1) }) self.lock_radius = radius_multiplier * self.metadata['box_size'] self.obj_locked = np.zeros((self.n_obj, ), dtype=int)
def __init__(self, x_dim=5, y_dim=5, **kwargs): self.x_dim = x_dim self.y_dim = y_dim self.num_states = x_dim * y_dim # Right, Up, Left, Down, Grab self.action_space = Discrete(5) self.observation_space = Dict( dict( desired_goal=Discrete(self.num_states), # Goal Position achieved_goal=Discrete(self.num_states), # block position observation=MultiDiscrete([self.num_states, 2]) #arm position, object in air )) self._location_space = Discrete(self.num_states) self._goal_location = self._location_space.sample() self._block_location = self._location_space.sample() self._arm_location = self._location_space.sample() self._picked_up_block = False self.action_handlers = [ self._move_function(lambda s: s - 1, lambda s: s % self.x_dim == 0), # right self._move_function(lambda s: s - self.x_dim, lambda s: s < self.x_dim), # up self._move_function(lambda s: s + 1, lambda s: (s + 1) % self.x_dim == 0), # left self._move_function( lambda s: s + self.x_dim, lambda s: s + self.x_dim >= self.x_dim * self.y_dim), # down self._grab ]
def __init__(self, init_space, nb_bins): if not isinstance(init_space, Box): raise RuntimeError( "Impossible to convert a gym space of type {} to a discrete space" " (it should be of " "type space.Box)" "".format(type(init_space))) if nb_bins < 2: raise RuntimeError( "This do not work with less that 1 bin (if you want to ignored some part " "of the action_space or observation_space please use the " "\"gym_space.ignore_attr\" or \"gym_space.keep_only_attr\"") min_ = init_space.low max_ = init_space.high self._ignored = min_ == max_ # which component are ignored self._res = min_ self._values = np.linspace(min_, max_, num=nb_bins + 2) self._values = self._values[ 1:-1, :] # the values that will be used when using #gym_to_glop # TODO there might a cleaner approach here self._bins_size = np.linspace(min_, max_, num=2 * nb_bins + 1) self._bins_size = self._bins_size[ 2:-1:2, :] # the values defining the "cuts" self._gen_idx = np.arange(self._bins_size.shape[-1]) n_bins = np.ones(min_.shape[0]) * nb_bins n_bins[ self. _ignored] = 1 # if min and max are equal, i don't want to have multiple variable BaseGymAttrConverter.__init__( self, space=MultiDiscrete(n_bins), )
def __init__(self): EzPickle.__init__(self) self.seed() self.viewer = None self.world = Box2D.b2World() self.terrain = None self.hull = None self.prev_shaping = None self.fd_polygon = fixtureDef( shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]), friction=FRICTION) self.fd_edge = fixtureDef( shape=edgeShape(vertices=[(0, 0), (1, 1)]), friction=FRICTION, categoryBits=0x0001, ) high = np.array([np.inf] * 28) #self.action_space = spaces.Box(np.array([-1, -1, -1, -1]), np.array([1, 1, 1, 1]), dtype=np.float32) self.observation_space = gym.spaces.Box(-high, high, dtype=np.float32) self.action_space = MultiDiscrete([3, 21, 21, 21, 21]) #self.observation_shape = (24,) #self.observation_space = gym.spaces.Box(low=-high, high=high, shape=self.observation_shape, dtype=np.float32) self.valid_actions = [] self.state_machine = None self.reset() self.terminal = False self.counter = 0
def __init__(self, env, num_pos_buckets, num_speed_buckets): super().__init__(env) self.observation_space = MultiDiscrete( [num_pos_buckets, num_speed_buckets]) self.pos_buckets = np.linspace(-1.2, 0.6, num_pos_buckets) self.speed_buckets = np.linspace(-0.07, 0.07, num_speed_buckets)
def make_obs_space(embed_dim=768, max_steps=None, max_utterances=5, max_command_length=5, max_variables=10, max_actions=10, **kwargs): true_obs = { 'dialog_history': Repeated(Dict({ 'sender': Discrete(3), 'utterance': Box(-10, 10, shape=(embed_dim, )) }), max_len=max_utterances), 'partial_command': Repeated(Box(-10, 10, shape=(embed_dim, )), max_len=max_command_length), 'variables': Repeated(Box(-10, 10, shape=(embed_dim, )), max_len=max_variables), } if max_steps: true_obs['steps'] = Discrete(max_steps) # return Dict(true_obs) For calculating true_obs_shsape return Dict({ "true_obs": Dict(true_obs), '_action_mask': MultiDiscrete([2 for _ in range(max_actions)]), '_action_embeds': Box(-10, 10, shape=(max_actions, embed_dim)), })
def __init__(self, config=None): self.s = 9 self.action_space = Discrete(self.s) self.observation_space = MultiDiscrete([3] * self.s) self.agents = ["X", "O"] self.empty = " " self.t, self.state, self.rewards_to_send = self._reset()
def test_preprocessing_disabled(self): config = ppo.DEFAULT_CONFIG.copy() config["env"] = "ray.rllib.examples.env.random_env.RandomEnv" config["env_config"] = { "config": { "observation_space": Dict({ "a": Discrete(5), "b": Dict({ "ba": Discrete(4), "bb": Box(-1.0, 1.0, (2, 3), dtype=np.float32) }), "c": Tuple((MultiDiscrete([2, 3]), Discrete(1))), "d": Box(-1.0, 1.0, (1, ), dtype=np.int32), }), }, } # Set this to True to enforce no preprocessors being used. # Complex observations now arrive directly in the model as # structures of batches, e.g. {"a": tensor, "b": [tensor, tensor]} # for obs-space=Dict(a=..., b=Tuple(..., ...)). config["_disable_preprocessor_api"] = True num_iterations = 1 # Only supported for tf so far. for _ in framework_iterator(config): trainer = ppo.PPOTrainer(config=config) for i in range(num_iterations): results = trainer.train() check_train_results(results) print(results) check_compute_single_action(trainer) trainer.stop()
def __init__(self, G_const=1.0, acceleration=30.0, time_step=0.01, time_limit=10, friction=10.0, seed=None, boundary_less=-1, boundary_greater=1, num_agents=3): ''' constants ''' self.G_const = G_const self.acceleration = acceleration self.time_step = time_step self.time_limit = time_limit self.friction = friction if (seed is None): self.seed = int(time.time()) else: self.seed = seed self.boundary_less = boundary_less self.boundary_greater = boundary_greater self.num_agents = num_agents self.action_space = MultiDiscrete([[0, 8] for _ in range(num_agents)]) # It's unclear what low and high here should be. Set them to 0 so # that if anyone tries to use them, it is more likely that obviously # wrong things happen. self.observation_space = Box(low=0, high=0, shape=(4*(num_agents+1),)) ''' variables that change with time ''' self.state = State(num_agents, seed) self.spec = None self.viewer = None
def __init__(self): ##### ##### Machine Teaching self.action_space = MultiDiscrete([21, 6, 4]) self.observation_shape = (1, 33, 33) self.observation_space = gym.spaces.Box(low=0, high=1, shape=self.observation_shape, dtype=np.float16) self.counter = 0 self.valid_actions1 = [1] * 21 self.valid_actions2 = [] for action in self.valid_actions1: self.valid_actions2.append([1] * 6) self.valid_actions3 = [] for i in range(21): tmp = [] for j in range(6): tmp.append([1] * 4) self.valid_actions3.append(tmp) self.valid_actions = [ self.valid_actions1, self.valid_actions2, self.valid_actions3 ] print('finished init')
def test_flatten_discrete(): md = MultiDiscrete([3, 4]) trafo = flatten(md) assert trafo.target == Discrete(12) # check that we get all actions exactly once actions = [] for (i, j) in itertools.product([0, 1, 2], [0, 1, 2, 3]): actions += [(i, j)] for i in range(0, 12): a = trafo.convert_from(i) assert a in actions, (a, actions) assert trafo.convert_to(a) == i actions = list(filter(lambda x: x != a, list(actions))) assert len(actions) == 0 # same test for binary md = MultiBinary(3) trafo = flatten(md) assert trafo.target == Discrete(2**3) # check that we get all actions exactly once actions = [] for (i, j, k) in itertools.product([0, 1], [0, 1], [0, 1]): actions += [(i, j, k)] for i in range(0, 8): a = trafo.convert_from(i) assert trafo.convert_to(a) == i assert a in actions, (a, actions) actions = list(filter(lambda x: x != a, actions)) assert len(actions) == 0 # check support for numpy array and list assert trafo.convert_to((1, 0, 1)) == trafo.convert_to(np.array([1, 0, 1])) assert trafo.convert_to((1, 0, 1)) == trafo.convert_to([1, 0, 1])
def __init__(self): self.action_space = MultiDiscrete([len(action_set_list)] * n_agents) low = np.array([-inf] * (len_action_list * 2 + (6 * n_agents))) high = np.array([inf] * (len_action_list * 2 + (6 * n_agents))) self.observation_space = Box(low, high, dtype=np.float32, shape=None) self.curr_episode = 0 self.seed()
def __init__(self, env, eat_thresh=0.5, max_food_health=10, respawn_time=np.inf, food_rew_type='selfish', reward_scale=1.0, reward_scale_obs=False): super().__init__(env) self.eat_thresh = eat_thresh self.max_food_health = max_food_health self.respawn_time = respawn_time self.food_rew_type = food_rew_type self.n_agents = self.metadata['n_agents'] if type(reward_scale) not in [list, tuple, np.ndarray]: reward_scale = [reward_scale, reward_scale] self.reward_scale = reward_scale self.reward_scale_obs = reward_scale_obs # Reset obs/action space to match self.max_n_food = self.metadata['max_n_food'] self.curr_n_food = self.metadata['curr_n_food'] self.max_food_size = self.metadata['food_size'] food_dim = 5 if self.reward_scale_obs else 4 self.observation_space = update_obs_space( self.env, { 'food_obs': (self.max_n_food, food_dim), 'food_health': (self.max_n_food, 1), 'food_eat': (self.max_n_food, 1) }) self.action_space.spaces['action_eat_food'] = Tuple([ MultiDiscrete([2] * self.max_n_food) for _ in range(self.n_agents) ])
def __init__(self): self.seed() # No rotation yet self.action_space = MultiDiscrete([OBJ_COUNT, GRID_SIZE, GRID_SIZE]) # State space: TODO self.observation_space = None
def __init__(self, env_config, seed=42): # Set seed np.random.seed(seed) self.mg = env_config['building'] self.Na = 2 + self.mg.architecture['grid'] * 3 + self.mg.architecture[ 'genset'] * 1 if self.mg.architecture['grid'] == 1 and self.mg.architecture[ 'genset'] == 1: self.Na += 1 self.action_space = Discrete(self.Na) self.Ns = 2 # net_load and soc dim1 = int(self.mg.parameters['PV_rated_power'] + self.mg.parameters['load']) dim2 = 100 self.observation_space = MultiDiscrete([dim1, dim2]) self.metadata = {"render.modes": ["human"]} self.state, self.reward, self.done, self.info, self.round = None, None, None, None, None self.round = None # Start the first round self.seed() self.reset()
def __init__( self, input_shape: Tuple[int, ...], trading_fraction: int = 10, trading_assets: int = 1, # later we want the bot to trade one of multiple possible assets allow_short: bool = False, stop_if_lost: float = None, initial_capital: float = 100000, commission=lambda size: 0.025): super().__init__( MultiDiscrete([trading_assets, trading_fraction]) if trading_assets > 1 else Discrete(trading_fraction + 1), Box(low=-1, high=1, shape=input_shape) ) # FIXME what shape? we also need historic trades? self.trading_fraction = trading_fraction self.initial_capital = initial_capital self.commission = commission self.stop_if_lost = stop_if_lost self.allow_short = allow_short if allow_short and (trading_fraction % 2) != 0: _log.warning('short trades expect even nr of trading fraction') # eventually do not serialize .. self.trade_log = StreamingTransactionLog() self.current_net = 0
def __init__(self, config, vst_config): super(VSTEnv, self).__init__() self.config = config self.vst_config = vst_config self.num_knobs = len(vst_config['rnd']) self.num_audio_samples = int( config['sampleRate'] * config['renderLength']) # Keep audio samples divisible by fftSize self.num_audio_samples = self.num_audio_samples - ( self.num_audio_samples % config['fftSize']) self.num_freq = int(1 + (config['fftSize'] / 2.0)) self.num_mfcc = 20 #self.num_windows = int((self.num_audio_samples / config['fftSize'] - 1.0) * (config['fftSize'] / config['hopSize']) + 1.0) self.num_windows = int((self.num_audio_samples / config['fftSize']) * (config['fftSize'] / config['hopSize']) + 1.0) # Mapping from action index (0, 1, ..., num_knobs) to VST parameter self.action_to_param = list(vst_config['rnd'].keys()) self.action_space = MultiDiscrete([self.num_knobs, 4]) #self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.num_freq, self.num_windows,)) self.observation_space = spaces.Box(low=-1.0, high=1.0, shape=(self.num_mfcc, self.num_windows)) #self.observation_space = spaces.Box(low=0, high=255, shape=(self.num_freq, self.num_windows, 1)) # Create VST engine and generator self.engine = rm.RenderEngine(config['sampleRate'], config['bufferSize'], config['fftSize']) self.engine.load_plugin(vst_config['vstPath']) self.generator = rm.PatchGenerator(self.engine)
def initialize_space(self, init_space): if not isinstance(init_space, Box): raise RuntimeError("Impossible to convert a gym space of type {} to a discrete space" " (it should be of " "type space.Box)" "".format(type(init_space))) min_ = init_space.low max_ = init_space.high self._ignored = min_ == max_ # which component are ignored self._res = min_ self._values = np.linspace(min_, max_, num=self._nb_bins+2) self._values = self._values[1:-1, :] # the values that will be used when using #gym_to_glop # TODO there might a cleaner approach here self._bins_size = np.linspace(min_, max_, num=2*self._nb_bins+1) self._bins_size = self._bins_size[2:-1:2, :] # the values defining the "cuts" self._gen_idx = np.arange(self._bins_size.shape[-1]) n_bins = np.ones(min_.shape[0], dtype=dt_int) * dt_int(self._nb_bins) n_bins[self._ignored] = 1 # if min and max are equal, i don't want to have multiple variable space = MultiDiscrete(n_bins) self.base_initialize(space=space, g2op_to_gym=None, gym_to_g2op=None)
def __init__(self, size, sleep=0, dict_state=False, recurse_state=False, ma_rew=0, multidiscrete_action=False, random_sleep=False): assert not ( dict_state and recurse_state), \ "dict_state and recurse_state cannot both be true" self.size = size self.sleep = sleep self.random_sleep = random_sleep self.dict_state = dict_state self.recurse_state = recurse_state self.ma_rew = ma_rew self._md_action = multidiscrete_action if dict_state: self.observation_space = Dict( {"index": Box(shape=(1, ), low=0, high=size - 1), "rand": Box(shape=(1,), low=0, high=1, dtype=np.float64)}) elif recurse_state: self.observation_space = Dict( {"index": Box(shape=(1, ), low=0, high=size - 1), "dict": Dict({ "tuple": Tuple((Discrete(2), Box(shape=(2,), low=0, high=1, dtype=np.float64))), "rand": Box(shape=(1, 2), low=0, high=1, dtype=np.float64)}) }) else: self.observation_space = Box(shape=(1, ), low=0, high=size - 1) if multidiscrete_action: self.action_space = MultiDiscrete([2, 2]) else: self.action_space = Discrete(2) self.done = False self.index = 0 self.seed()