def test_validActions2(self):
     gameEnv2 = MAPF_Env.MAPFEnv(num_agents2,
                                 world0=np.array(world2),
                                 goals0=np.array(goals2),
                                 DIAGONAL_MOVEMENT=False)
     validActions2 = gameEnv2._listNextValidActions(1)
     self.assertEqual(validActions2, [0])
     # With diagonal actions
     gameEnv2 = MAPF_Env.MAPFEnv(num_agents2,
                                 world0=np.array(world2),
                                 goals0=np.array(goals2),
                                 DIAGONAL_MOVEMENT=True)
     validActions2 = gameEnv2._listNextValidActions(1)
     self.assertEqual(validActions2, [0, 5, 6, 7, 8])
 def test_validActions1(self):
     # MAPF_Env.MAPFEnv(self, num_agents=1, world0=None, goals0=None, DIAGONAL_MOVEMENT=False, SIZE=10, PROB=.2, FULL_HELP=False)
     gameEnv1 = MAPF_Env.MAPFEnv(num_agents1,
                                 world0=np.array(world1),
                                 goals0=np.array(goals1),
                                 DIAGONAL_MOVEMENT=False)
     validActions1 = gameEnv1._listNextValidActions(1)
     self.assertEqual(validActions1, [0, 1, 2])
     # With diagonal actions
     gameEnv1 = MAPF_Env.MAPFEnv(num_agents1,
                                 world0=np.array(world1),
                                 goals0=np.array(goals1),
                                 DIAGONAL_MOVEMENT=True)
     validActions1 = gameEnv1._listNextValidActions(1)
     self.assertEqual(validActions1, [0, 1, 2, 5])
 def test_validActions3(self):
     gameEnv3 = MAPF_Env.MAPFEnv(num_agents3,
                                 world0=np.array(world3),
                                 goals0=np.array(goals3),
                                 DIAGONAL_MOVEMENT=False)
     validActions3a = gameEnv3._listNextValidActions(1)
     validActions3b = gameEnv3._listNextValidActions(2)
     self.assertEqual(validActions3a, [0])
     self.assertEqual(validActions3b, [0, 2])
     # With diagonal actions
     gameEnv3 = MAPF_Env.MAPFEnv(num_agents3,
                                 world0=np.array(world3),
                                 goals0=np.array(goals3),
                                 DIAGONAL_MOVEMENT=True)
     validActions3a = gameEnv3._listNextValidActions(1)
     validActions3b = gameEnv3._listNextValidActions(2)
     self.assertEqual(validActions3a, [0, 5, 6, 7])
     self.assertEqual(validActions3b, [0, 2, 5, 8])
 def test_move_diag5(self):
     #tests diag collisions
     gameEnv4 = MAPF_Env.MAPFEnv(num_agents4,
                                 world0=np.array(world4),
                                 goals0=np.array(goals4),
                                 DIAGONAL_MOVEMENT=True)
     s0 = gameEnv4.world.state.copy()
     # Agent 1
     s1, r, d, _, o_g, _, _ = gameEnv4.step((1, 5))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.ACTION_COST)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
     self.assertEqual(1, gameEnv4.world.state[4, 4])
     # Agent 2
     s1, r, d, _, o_g, _, _ = gameEnv4.step((2, 6))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.COLLISION_REWARD)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
     self.assertEqual(2, gameEnv4.world.state[3, 4])
     # Agent 2
     s1, r, d, _, o_g, _, _ = gameEnv4.step((2, 6))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.COLLISION_REWARD)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
     self.assertEqual(2, gameEnv4.world.state[3, 4])
     # Agent 1
     s1, r, d, _, o_g, _, _ = gameEnv4.step((1, 7))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.GOAL_REWARD)
     self.assertFalse(d)
     self.assertTrue(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
     self.assertEqual(1, gameEnv4.world.state[3, 3])
     # Agent 2
     s1, r, d, _, o_g, _, _ = gameEnv4.step((2, 6))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.COLLISION_REWARD)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
     self.assertEqual(2, gameEnv4.world.state[3, 4])
     #after waiting, we should be able to cross diagonally
     s1, r, d, _, o_g, _, _ = gameEnv4.step((1, 0))
     # Agent 2
     s1, r, d, _, o_g, _, _ = gameEnv4.step((2, 6))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.ACTION_COST)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
     self.assertEqual(2, gameEnv4.world.state[4, 3])
 def test_validActions4(self):
     gameEnv4 = MAPF_Env.MAPFEnv(num_agents4,
                                 world0=np.array(world4),
                                 goals0=np.array(goals4),
                                 DIAGONAL_MOVEMENT=False)
     validActions4a = gameEnv4._listNextValidActions(1)
     validActions4b = gameEnv4._listNextValidActions(2)
     self.assertEqual(validActions4a, [0, 2])
     self.assertEqual(validActions4b, [0, 2])
     # With diagonal actions
     gameEnv4 = MAPF_Env.MAPFEnv(num_agents4,
                                 world0=np.array(world4),
                                 goals0=np.array(goals4),
                                 DIAGONAL_MOVEMENT=True)
     validActions4a = gameEnv4._listNextValidActions(1)
     validActions4b = gameEnv4._listNextValidActions(2)
     self.assertEqual(validActions4a, [0, 2, 5, 6, 7])
     self.assertEqual(validActions4b, [0, 2, 5, 6])
 def test_move_south2(self):
     gameEnv2 = MAPF_Env.MAPFEnv(num_agents2,
                                 world0=np.array(world2),
                                 goals0=np.array(goals2))
     s0 = gameEnv2.world.state.copy()
     s1, r, d, _, o_g, _, _ = gameEnv2.step((1, 4))
     s2 = gameEnv2.world.state.copy()
     self.assertEqual(r, MAPF_Env.COLLISION_REWARD)
     self.assertTrue(d)
     self.assertTrue(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
 def test_move_south1(self):
     gameEnv1 = MAPF_Env.MAPFEnv(num_agents1,
                                 world0=np.array(world1),
                                 goals0=np.array(goals1))
     s0 = gameEnv1.world.state.copy()
     # return state, reward, done, nextActions, on_goal
     s1, r, d, _, o_g, _, _ = gameEnv1.step((1, 4))
     s2 = gameEnv1.world.state.copy()
     self.assertEqual(r, MAPF_Env.COLLISION_REWARD)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
 def testIdle1(self):
     gameEnv1 = MAPF_Env.MAPFEnv(num_agents1,
                                 world0=np.array(world1),
                                 goals0=np.array(goals1))
     s0 = gameEnv1.world.state.copy()
     # return state, reward, done, nextActions, on_goal, blocking, valid_action
     s1, r, d, _, o_g, _, _ = gameEnv1.step((1, 0))
     s2 = gameEnv1.world.state.copy()
     self.assertEqual(r, MAPF_Env.IDLE_COST)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
 def test_move_south4b(self):
     gameEnv4 = MAPF_Env.MAPFEnv(num_agents4,
                                 world0=np.array(world4),
                                 goals0=np.array(goals4))
     s0 = gameEnv4.world.state.copy()
     # Agent 2
     s1, r, d, _, o_g, _, _ = gameEnv4.step((2, 4))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.COLLISION_REWARD)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
     # Agent 1
     s1, r, d, _, o_g, _, _ = gameEnv4.step((1, 4))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.COLLISION_REWARD)
     self.assertFalse(d)
     self.assertTrue(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
 def test_move_west3b(self):
     gameEnv3 = MAPF_Env.MAPFEnv(num_agents3,
                                 world0=np.array(world3),
                                 goals0=np.array(goals3))
     s0 = gameEnv3.world.state.copy()
     # Agent 2
     s1, r, d, _, o_g, _, _ = gameEnv3.step((2, 3))
     s2 = gameEnv3.world.state.copy()
     self.assertEqual(r, MAPF_Env.COLLISION_REWARD)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
     # Agent 1
     s1, r, d, _, o_g, _, _ = gameEnv3.step((1, 3))
     s2 = gameEnv3.world.state.copy()
     self.assertEqual(r, MAPF_Env.COLLISION_REWARD)
     self.assertFalse(d)
     self.assertTrue(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
 def testIdle3(self):
     gameEnv3 = MAPF_Env.MAPFEnv(num_agents3,
                                 world0=np.array(world3),
                                 goals0=np.array(goals3))
     s0 = gameEnv3.world.state.copy()
     # Agent 1
     s1, r, d, _, o_g, _, _ = gameEnv3.step((1, 0))
     s2 = gameEnv3.world.state.copy()
     self.assertEqual(r, MAPF_Env.GOAL_REWARD)
     self.assertFalse(d)
     self.assertTrue(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
     # Agent 2
     s1, r, d, _, o_g, _, _ = gameEnv3.step((2, 0))
     s2 = gameEnv3.world.state.copy()
     self.assertEqual(r, MAPF_Env.IDLE_COST)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
 def test_move_southwest4a(self):
     gameEnv4 = MAPF_Env.MAPFEnv(num_agents4,
                                 world0=np.array(world4),
                                 goals0=np.array(goals4),
                                 DIAGONAL_MOVEMENT=True)
     s0 = gameEnv4.world.state.copy()
     # Agent 1
     s1, r, d, _, o_g, _, _ = gameEnv4.step((1, 7))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.ACTION_COST)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
     # Agent 2
     s1, r, d, _, o_g, _, _ = gameEnv4.step((2, 7))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.COLLISION_REWARD)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
 def testIdle4(self):
     gameEnv4 = MAPF_Env.MAPFEnv(num_agents4,
                                 world0=np.array(world4),
                                 goals0=np.array(goals4),
                                 DIAGONAL_MOVEMENT=False)
     s0 = gameEnv4.world.state.copy()
     # Agent 1
     s1, r, d, _, o_g, _, _ = gameEnv4.step((1, 0))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.GOAL_REWARD)
     self.assertFalse(d)
     self.assertTrue(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
     # Agent 2
     s1, r, d, _, o_g, _, _ = gameEnv4.step((2, 0))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.IDLE_COST)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
 def test_move_diag2(self):
     gameEnv4 = MAPF_Env.MAPFEnv(num_agents4,
                                 world0=np.array(world4),
                                 goals0=np.array(goals4),
                                 DIAGONAL_MOVEMENT=True)
     s0 = gameEnv4.world.state.copy()
     # Agent 2
     s1, r, d, _, o_g, _, _ = gameEnv4.step((2, 6))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.ACTION_COST)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
     self.assertEqual(2, gameEnv4.world.state[4, 3])
     # Agent 1
     s1, r, d, _, o_g, _, _ = gameEnv4.step((1, 6))
     s2 = gameEnv4.world.state.copy()
     self.assertEqual(r, MAPF_Env.ACTION_COST)
     self.assertFalse(d)
     self.assertFalse(o_g)
     self.assertEqual(np.sum(s0), np.sum(s2))
     self.assertEqual(1, gameEnv4.world.state[4, 2])
Exemple #15
0
        lr = tf.constant(LR_Q)
    trainer = tf.contrib.opt.NadamOptimizer(learning_rate=lr, use_locking=True)

    if TRAINING:
        num_workers = NUM_THREADS  # Set workers # = # of available CPU threads
    else:
        num_workers = NUM_THREADS
        NUM_META_AGENTS = 1

    gameEnvs, workers, groupLocks = [], [], []
    n = 1  #counter of total number of agents (for naming)
    for ma in range(NUM_META_AGENTS):
        num_agents = NUM_THREADS
        gameEnv = mapf_gym.MAPFEnv(num_agents=num_agents,
                                   DIAGONAL_MOVEMENT=DIAG_MVMT,
                                   SIZE=ENVIRONMENT_SIZE,
                                   observation_size=GRID_SIZE,
                                   PROB=OBSTACLE_DENSITY,
                                   FULL_HELP=FULL_HELP)
        gameEnvs.append(gameEnv)

        # Create groupLock
        workerNames = ["worker_" + str(i) for i in range(n, n + num_workers)]
        groupLock = GroupLock.GroupLock([workerNames, workerNames])
        groupLocks.append(groupLock)

        # Create worker classes
        workersTmp = []
        for i in range(ma * num_workers + 1, (ma + 1) * num_workers + 1):
            workersTmp.append(Worker(gameEnv, ma, n, a_size, groupLock))
            n += 1
        workers.append(workersTmp)