예제 #1
0
 def __init__(self, actionspace: Dict, packing: dict):
     '''
     @actionspace the original actionspace
     @packing a dict that maps new dict keys 
     into a list of keys from actionspace.
     The keys in the list are to be removed from the 
     actionspace and the new dict keys are added. 
     IMPORTANT packing keys must NOT be in actionspace.
     example. Say your actionspace has keys a,b,c.
     Then packing could be {'a_b':['a','b']}. The new 
     space will then have keys 'a_b' and 'c'
     
     '''
     self._originalspace = DecoratedSpace.create(actionspace)
     self._subdicts = {}
     newdict = actionspace.spaces.copy()
     # now replace keys according to packing instructions.
     for sid in packing:
         subdict = self._createSubspace(packing[sid])
         self._subdicts[sid] = subdict
         newdict[sid] = subdict.getSpace()
         for oldkey in packing[sid]:
             if not oldkey in newdict:
                 raise Exception("Packing instruction " + str(packing) + " refers unknown key " + oldkey)
             newdict.pop(oldkey)
     # we set this up as if it is a dict
     # NOTE    super(Dict, self).__init__(newdict) does NOT work as intended
     Dict.__init__(self, newdict)
예제 #2
0
 def test_MultiBinary(self):
     space = DecoratedSpace.create(MultiBinary(7))
     self.assertEquals(2**7, space.getSize())
     self.assertEquals([], space.getSubSpaces())
     # reverse of normal binary notation
     self.assertEquals([0, 1, 0, 1, 1, 0, 1],
                       list(space.getById(64 + 16 + 8 + 2)))
예제 #3
0
    def __init__(self, agentComponentList:List[QAgentComponent], actionspace:Dict=None, observationspace=None, parameters:dict=None):
        """
        @param AgentComponentList: a list of QAgentComponent. Size must be >=1.
        The agent environments should be equal to our environment,
        or to a Packed version of it. We can't check this because
        environments do not implement equals at this moment.
        @param environment the openAI Gym Env. Must have actionspace of type Dict.
        Must be a non-packed space, so that the actions can be packed
        properly for each QAgentComponent individually.
        @param parameters the optional init dictionary with parameters  
        """
        if not isinstance(actionspace, Dict):
            raise ValueError("actionspace must be Dict but found " + str(actionspace))

        if len(agentComponentList) == 0:
            raise ValueError("There must be at least 1 agent in the list")
        
        for agent in agentComponentList:
            if not isinstance(agent, QAgentComponent):
                raise ValueError("All agent components for QCoordinator must be QAgentComponent but found " + agent)

        super(QCoordinator, self).__init__(agentComponentList, actionspace, observationspace, parameters)
        self.actionspace = DecoratedSpace.create(actionspace)
        if self.actionspace.getSize() == 0:
            # error because we then can't find the best action
            raise ValueError("There are no actions in the space")
예제 #4
0
 def test_Dict(self):
     space = DecoratedSpace.create(
         Dict({
             'a': Discrete(5),
             'b': Discrete(2)
         }))
     self.assertEquals(10, space.getSize())
     self.assertEquals(2, len(space.getSubSpaces()))
     space.getById(8)  # smoke test
     self.assertEquals({'a': 3, 'b': 1}, space.getById(8))
     self.assertEquals(8, space.getIndexOf({'a': 3, 'b': 1}))
     self.assertEquals(8, space.getIndexOf({'b': 1, 'a': 3}))
예제 #5
0
 def __init__(self, robot_id: str, env: Env, parameters: dict = None):
     super().__init__(robot_id, env, parameters)
     full_action_space = DecoratedSpace.create(env.action_space)
     self._action_space = full_action_space.get(robot_id)
     self._env = env
     self._robot_id = robot_id
     self._robot_domain = self._env.robots[self._robot_id].get_domain
     self._graph = self._env.create_graph(self._env.robots[self._robot_id])
     self._action_mapping = {
         (-1, 0): self.ACTIONS.get('UP'),
         (1, 0): self.ACTIONS.get('DOWN'),
         (0, -1): self.ACTIONS.get('LEFT'),
         (0, 1): self.ACTIONS.get('RIGHT')
     }
     # Compute shortest paths between all nodes
     self._path_dict = dict(nx.all_pairs_dijkstra_path(self._graph))
예제 #6
0
 def test_Dict_with_Dict(self):
     space = DecoratedSpace.create(
         Dict({
             'p': Dict({
                 'a': Discrete(5),
                 'b': Discrete(2)
             }),
             'q': Discrete(7)
         }))
     self.assertEquals(70, space.getSize())
     self.assertEquals(2, len(space.getSubSpaces()))
     self.assertEquals({'p': {'a': 4, 'b': 0}, 'q': 3}, space.getById(34))
     self.assertEquals(34, space.getIndexOf({
         'p': {
             'a': 4,
             'b': 0
         },
         'q': 3
     }))
예제 #7
0
    def __init__(self,
                 switchId: str,
                 actionspace: Dict = None,
                 observationspace=None,
                 parameters: dict = {}):
        self._parameters = copy.deepcopy(self.DEFAULT_PARAMETERS)
        self._parameters.update(parameters)

        super().__init__(switchId, actionspace, observationspace,
                         self._parameters)
        # determine our action space, subset of env action_space
        self._lastAction = None
        self._lastState = None
        self._alpha = self._parameters['alpha']
        self._gamma = self._parameters['gamma']
        self._epsilon = self._parameters['epsilon']
        self._Q = {}  # Q[state][action]=Q value after _lastAction
        self._steps = 0
        self._eval = False  # in eval mode, the agent executes the greedy policy given by the q function
        self._actionspace = DecoratedSpace.create(actionspace)
예제 #8
0
 def __init__(self, agentId, actionspace:Dict, observationspace, parameters:dict):
     AtomicAgent.__init__(self, agentId, actionspace, observationspace, parameters)
     self._parameters = copy.deepcopy(self.DEFAULT_PARAMETERS)
     self._parameters = recursive_update(self._parameters, parameters)
     
     self._prev_state = None
     # TODO: change to self._step_output = dict({"obs": observation_space.sample(), "reward": None, "done": None, "prev_action": None})
     self._step_output = None
     self._action = [-1]
     decoratedspace = DecoratedSpace.create(actionspace)
     self._num_actions = decoratedspace.n
     self._train_frequency = self._parameters['train_frequency']
     self._save_frequency = self._parameters['save_frequency']
     self._agentId = agentId
     self._PPO = PPO(self._parameters, self._num_actions)
     self._buffer = Buffer(self._parameters, self._num_actions)
     self._cumulative_rewards = 0
     self._episode_step = 0
     self._episodes = 1
     self._t = 0
     self._stats = {"cumulative_rewards": [],
                     "episode_length": [],
                     "value": [],
                     "learning_rate": [],
                     "entropy": [],
                     "policy_loss": [],
                     "value_loss": []}
     tf.reset_default_graph()
     self._step = 0
     summary_path = 'summaries/' + self._parameters['name'] + '_' + \
                     self._parameters['algorithm']
     if not os.path.exists(summary_path):
         os.makedirs(summary_path)
     self._summary_writer = tf.summary.FileWriter(summary_path)
     if self._parameters['influence']:
         self._seq_len = self._parameters['inf_seq_len']
     elif self._parameters['recurrent']:
         self._seq_len = self._parameters['seq_len']
     else:
         self._seq_len = 1
예제 #9
0
 def test_numberToList(self):
     for val in numberListTestValues:
         self.assertEqual(val[1],
                          DecoratedSpace.numberToList(val[2], val[0]))
예제 #10
0
 def test_Box(self):
     space = DecoratedSpace.create(Box(low=-1.0, high=2.0, shape=(3, 4)))
     self.assertEquals(math.inf, space.getSize())
     self.assertRaises(Exception, space.getOriginalSpace, 1)
예제 #11
0
 def test_Discrete(self):
     space = DecoratedSpace.create(Discrete(5))
     self.assertEquals(5, space.getSize())
     self.assertEquals([], space.getSubSpaces())
     self.assertEquals(2, space.getById(2))
     self.assertEquals(2, space.getIndexOf(2))
예제 #12
0
 def test_listToNumber(self):
     for val in numberListTestValues:
         self.assertEqual(val[2],
                          DecoratedSpace.listToNumber(val[1], val[0]))
예제 #13
0
    def __init__(self, agentId, actionspace: Dict, observationspace,
                 parameters: dict):
        """
        @param parameters dict that must contain keys 'otherAgents', 'treeAgent' and 'rolloutAgent'
        'otherAgents' must map to a (possibly empty) list of dict objects for a call to createAgents
        'treeAgent' and 'rolloutAgent' must map to a dict object for a call to createAgent.
        The dict must also contain a 'simulator' key containing a copy of the env parameters,
        so that the agent can create a duplicate environment. The simulator dict must contain
        a key 'fullname' containing the full name of the environment
        for the class loader (se EnvironmentsFactory).
        """
        super().__init__(agentId, actionspace, observationspace, parameters)
        if not ('treeAgent' in parameters and 'rolloutAgent' in parameters):
            raise "parameters does not contain 'treeAgent', 'rolloutAgent':" + str(
                parameters)
        self._parameters = copy.deepcopy(self.DEFAULT_PARAMETERS)
        self._parameters = recursive_update(self._parameters, parameters)

        if 'timeLimit' in self._parameters:
            if 'iterationLimit' in self._parameters:
                raise ValueError(
                    "Cannot have both a time limit and an iteration limit")
            self._limitType = 'time'
        else:
            if 'iterationLimit' not in self._parameters:
                DEFAULT_LIMIT = 1000
                logging.error(
                    "Must have either a time limit or an iteration limit. Using default iteration limit: "
                    + str(DEFAULT_LIMIT))
                self._parameters['iterationLimit'] = DEFAULT_LIMIT
            # number of iterations of the search
            if self._parameters['iterationLimit'] < 1:
                raise ValueError("Iteration limit must be greater than one")
            self._limitType = 'iterations'

        # start the simulator environment
        envparams = self._parameters['simulator']
        e = EnvironmentFactory.createEnvironment(envparams['fullname'],
                                                 envparams)
        self._simulator = ModifiedGymEnv(
            e, DecoratedSpace.create(copy.deepcopy(e.action_space)))

        # diyBonus logic: to refactor -- include in a simulator factory / only for FactoryFloor env
        diyBonus = self._parameters.get("diyBonus")
        if diyBonus is not None:
            self._simulator = DiyFactoryFloorAdapter(self._simulator, diyBonus,
                                                     self.agentId)

        self._treeAgent = createAgent(self._simulator.action_space,
                                      self._simulator.observation_space,
                                      parameters['treeAgent'])

        if 'otherAgents' in parameters:
            rolloutAgentDict = copy.deepcopy(parameters['otherAgents'])
            rolloutAgentList = rolloutAgentDict['subAgentList']
            rolloutAgentList.append(parameters['rolloutAgent'])
            rolloutAgentDict['subAgentList'] = rolloutAgentList
            self._rolloutAgent = createAgent(self._simulator.action_space,
                                             self._simulator.observation_space,
                                             rolloutAgentDict)
            self._otherAgents = createAgent(self._simulator.action_space,
                                            self._simulator.observation_space,
                                            parameters['otherAgents'])
        else:
            self._otherAgents = None
            self._rolloutAgent = createAgent(self._simulator.action_space,
                                             self._simulator.observation_space,
                                             parameters['rolloutAgent'])
예제 #14
0
 def test_emptyDict(self):
     space = DecoratedSpace.create(Dict({}))
     self.assertEquals(0, space.getSize())
예제 #15
0
 def test_Tuple(self):
     space = DecoratedSpace.create(Tuple((Discrete(2), Discrete(3))))
     self.assertEquals(2, len(space.getSubSpaces()))
     self.assertEquals(6, space.getSize())
     self.assertEquals((0, 2), space.getById(4))
예제 #16
0
 def test_MultiDiscrete(self):
     space = DecoratedSpace.create(MultiDiscrete([5, 2, 3]))
     self.assertEquals(30, space.getSize())
     self.assertEquals([], space.getSubSpaces())
     self.assertEquals([2, 1, 2], list(space.getById(27)))
     self.assertEquals(27, space.getIndexOf([2, 1, 2]))
예제 #17
0
 def __init__(self, agentId:str, actionspace:Dict, observationspace, parameters:dict=None):
     super().__init__(agentId, actionspace, observationspace, parameters)
     full_action_space = DecoratedSpace.create(actionspace)
     self.action_space = full_action_space.get(agentId)