def __init__(self, actionspace: Dict, packing: dict): ''' @actionspace the original actionspace @packing a dict that maps new dict keys into a list of keys from actionspace. The keys in the list are to be removed from the actionspace and the new dict keys are added. IMPORTANT packing keys must NOT be in actionspace. example. Say your actionspace has keys a,b,c. Then packing could be {'a_b':['a','b']}. The new space will then have keys 'a_b' and 'c' ''' self._originalspace = DecoratedSpace.create(actionspace) self._subdicts = {} newdict = actionspace.spaces.copy() # now replace keys according to packing instructions. for sid in packing: subdict = self._createSubspace(packing[sid]) self._subdicts[sid] = subdict newdict[sid] = subdict.getSpace() for oldkey in packing[sid]: if not oldkey in newdict: raise Exception("Packing instruction " + str(packing) + " refers unknown key " + oldkey) newdict.pop(oldkey) # we set this up as if it is a dict # NOTE super(Dict, self).__init__(newdict) does NOT work as intended Dict.__init__(self, newdict)
def test_MultiBinary(self): space = DecoratedSpace.create(MultiBinary(7)) self.assertEquals(2**7, space.getSize()) self.assertEquals([], space.getSubSpaces()) # reverse of normal binary notation self.assertEquals([0, 1, 0, 1, 1, 0, 1], list(space.getById(64 + 16 + 8 + 2)))
def __init__(self, agentComponentList:List[QAgentComponent], actionspace:Dict=None, observationspace=None, parameters:dict=None): """ @param AgentComponentList: a list of QAgentComponent. Size must be >=1. The agent environments should be equal to our environment, or to a Packed version of it. We can't check this because environments do not implement equals at this moment. @param environment the openAI Gym Env. Must have actionspace of type Dict. Must be a non-packed space, so that the actions can be packed properly for each QAgentComponent individually. @param parameters the optional init dictionary with parameters """ if not isinstance(actionspace, Dict): raise ValueError("actionspace must be Dict but found " + str(actionspace)) if len(agentComponentList) == 0: raise ValueError("There must be at least 1 agent in the list") for agent in agentComponentList: if not isinstance(agent, QAgentComponent): raise ValueError("All agent components for QCoordinator must be QAgentComponent but found " + agent) super(QCoordinator, self).__init__(agentComponentList, actionspace, observationspace, parameters) self.actionspace = DecoratedSpace.create(actionspace) if self.actionspace.getSize() == 0: # error because we then can't find the best action raise ValueError("There are no actions in the space")
def test_Dict(self): space = DecoratedSpace.create( Dict({ 'a': Discrete(5), 'b': Discrete(2) })) self.assertEquals(10, space.getSize()) self.assertEquals(2, len(space.getSubSpaces())) space.getById(8) # smoke test self.assertEquals({'a': 3, 'b': 1}, space.getById(8)) self.assertEquals(8, space.getIndexOf({'a': 3, 'b': 1})) self.assertEquals(8, space.getIndexOf({'b': 1, 'a': 3}))
def __init__(self, robot_id: str, env: Env, parameters: dict = None): super().__init__(robot_id, env, parameters) full_action_space = DecoratedSpace.create(env.action_space) self._action_space = full_action_space.get(robot_id) self._env = env self._robot_id = robot_id self._robot_domain = self._env.robots[self._robot_id].get_domain self._graph = self._env.create_graph(self._env.robots[self._robot_id]) self._action_mapping = { (-1, 0): self.ACTIONS.get('UP'), (1, 0): self.ACTIONS.get('DOWN'), (0, -1): self.ACTIONS.get('LEFT'), (0, 1): self.ACTIONS.get('RIGHT') } # Compute shortest paths between all nodes self._path_dict = dict(nx.all_pairs_dijkstra_path(self._graph))
def test_Dict_with_Dict(self): space = DecoratedSpace.create( Dict({ 'p': Dict({ 'a': Discrete(5), 'b': Discrete(2) }), 'q': Discrete(7) })) self.assertEquals(70, space.getSize()) self.assertEquals(2, len(space.getSubSpaces())) self.assertEquals({'p': {'a': 4, 'b': 0}, 'q': 3}, space.getById(34)) self.assertEquals(34, space.getIndexOf({ 'p': { 'a': 4, 'b': 0 }, 'q': 3 }))
def __init__(self, switchId: str, actionspace: Dict = None, observationspace=None, parameters: dict = {}): self._parameters = copy.deepcopy(self.DEFAULT_PARAMETERS) self._parameters.update(parameters) super().__init__(switchId, actionspace, observationspace, self._parameters) # determine our action space, subset of env action_space self._lastAction = None self._lastState = None self._alpha = self._parameters['alpha'] self._gamma = self._parameters['gamma'] self._epsilon = self._parameters['epsilon'] self._Q = {} # Q[state][action]=Q value after _lastAction self._steps = 0 self._eval = False # in eval mode, the agent executes the greedy policy given by the q function self._actionspace = DecoratedSpace.create(actionspace)
def __init__(self, agentId, actionspace:Dict, observationspace, parameters:dict): AtomicAgent.__init__(self, agentId, actionspace, observationspace, parameters) self._parameters = copy.deepcopy(self.DEFAULT_PARAMETERS) self._parameters = recursive_update(self._parameters, parameters) self._prev_state = None # TODO: change to self._step_output = dict({"obs": observation_space.sample(), "reward": None, "done": None, "prev_action": None}) self._step_output = None self._action = [-1] decoratedspace = DecoratedSpace.create(actionspace) self._num_actions = decoratedspace.n self._train_frequency = self._parameters['train_frequency'] self._save_frequency = self._parameters['save_frequency'] self._agentId = agentId self._PPO = PPO(self._parameters, self._num_actions) self._buffer = Buffer(self._parameters, self._num_actions) self._cumulative_rewards = 0 self._episode_step = 0 self._episodes = 1 self._t = 0 self._stats = {"cumulative_rewards": [], "episode_length": [], "value": [], "learning_rate": [], "entropy": [], "policy_loss": [], "value_loss": []} tf.reset_default_graph() self._step = 0 summary_path = 'summaries/' + self._parameters['name'] + '_' + \ self._parameters['algorithm'] if not os.path.exists(summary_path): os.makedirs(summary_path) self._summary_writer = tf.summary.FileWriter(summary_path) if self._parameters['influence']: self._seq_len = self._parameters['inf_seq_len'] elif self._parameters['recurrent']: self._seq_len = self._parameters['seq_len'] else: self._seq_len = 1
def test_numberToList(self): for val in numberListTestValues: self.assertEqual(val[1], DecoratedSpace.numberToList(val[2], val[0]))
def test_Box(self): space = DecoratedSpace.create(Box(low=-1.0, high=2.0, shape=(3, 4))) self.assertEquals(math.inf, space.getSize()) self.assertRaises(Exception, space.getOriginalSpace, 1)
def test_Discrete(self): space = DecoratedSpace.create(Discrete(5)) self.assertEquals(5, space.getSize()) self.assertEquals([], space.getSubSpaces()) self.assertEquals(2, space.getById(2)) self.assertEquals(2, space.getIndexOf(2))
def test_listToNumber(self): for val in numberListTestValues: self.assertEqual(val[2], DecoratedSpace.listToNumber(val[1], val[0]))
def __init__(self, agentId, actionspace: Dict, observationspace, parameters: dict): """ @param parameters dict that must contain keys 'otherAgents', 'treeAgent' and 'rolloutAgent' 'otherAgents' must map to a (possibly empty) list of dict objects for a call to createAgents 'treeAgent' and 'rolloutAgent' must map to a dict object for a call to createAgent. The dict must also contain a 'simulator' key containing a copy of the env parameters, so that the agent can create a duplicate environment. The simulator dict must contain a key 'fullname' containing the full name of the environment for the class loader (se EnvironmentsFactory). """ super().__init__(agentId, actionspace, observationspace, parameters) if not ('treeAgent' in parameters and 'rolloutAgent' in parameters): raise "parameters does not contain 'treeAgent', 'rolloutAgent':" + str( parameters) self._parameters = copy.deepcopy(self.DEFAULT_PARAMETERS) self._parameters = recursive_update(self._parameters, parameters) if 'timeLimit' in self._parameters: if 'iterationLimit' in self._parameters: raise ValueError( "Cannot have both a time limit and an iteration limit") self._limitType = 'time' else: if 'iterationLimit' not in self._parameters: DEFAULT_LIMIT = 1000 logging.error( "Must have either a time limit or an iteration limit. Using default iteration limit: " + str(DEFAULT_LIMIT)) self._parameters['iterationLimit'] = DEFAULT_LIMIT # number of iterations of the search if self._parameters['iterationLimit'] < 1: raise ValueError("Iteration limit must be greater than one") self._limitType = 'iterations' # start the simulator environment envparams = self._parameters['simulator'] e = EnvironmentFactory.createEnvironment(envparams['fullname'], envparams) self._simulator = ModifiedGymEnv( e, DecoratedSpace.create(copy.deepcopy(e.action_space))) # diyBonus logic: to refactor -- include in a simulator factory / only for FactoryFloor env diyBonus = self._parameters.get("diyBonus") if diyBonus is not None: self._simulator = DiyFactoryFloorAdapter(self._simulator, diyBonus, self.agentId) self._treeAgent = createAgent(self._simulator.action_space, self._simulator.observation_space, parameters['treeAgent']) if 'otherAgents' in parameters: rolloutAgentDict = copy.deepcopy(parameters['otherAgents']) rolloutAgentList = rolloutAgentDict['subAgentList'] rolloutAgentList.append(parameters['rolloutAgent']) rolloutAgentDict['subAgentList'] = rolloutAgentList self._rolloutAgent = createAgent(self._simulator.action_space, self._simulator.observation_space, rolloutAgentDict) self._otherAgents = createAgent(self._simulator.action_space, self._simulator.observation_space, parameters['otherAgents']) else: self._otherAgents = None self._rolloutAgent = createAgent(self._simulator.action_space, self._simulator.observation_space, parameters['rolloutAgent'])
def test_emptyDict(self): space = DecoratedSpace.create(Dict({})) self.assertEquals(0, space.getSize())
def test_Tuple(self): space = DecoratedSpace.create(Tuple((Discrete(2), Discrete(3)))) self.assertEquals(2, len(space.getSubSpaces())) self.assertEquals(6, space.getSize()) self.assertEquals((0, 2), space.getById(4))
def test_MultiDiscrete(self): space = DecoratedSpace.create(MultiDiscrete([5, 2, 3])) self.assertEquals(30, space.getSize()) self.assertEquals([], space.getSubSpaces()) self.assertEquals([2, 1, 2], list(space.getById(27))) self.assertEquals(27, space.getIndexOf([2, 1, 2]))
def __init__(self, agentId:str, actionspace:Dict, observationspace, parameters:dict=None): super().__init__(agentId, actionspace, observationspace, parameters) full_action_space = DecoratedSpace.create(actionspace) self.action_space = full_action_space.get(agentId)