예제 #1
0
 def test_all_reduce(self):
     commsParams = commsParamsTest()
     commsParams.collective = "all_reduce"
     commsParams.beginSize = 0
     commsParams.element_size = 2
     world_size = 16
     comms_utils.fixBeginSize(commsParams, world_size)
     # For reduce collectives, beginSize should >= element_size
     self.assertEqual(2, commsParams.beginSize)
예제 #2
0
 def test_all_to_all(self):
     commsParams = commsParamsTest()
     commsParams.collective = "all_to_all"
     commsParams.beginSize = 0
     commsParams.element_size = 2
     commsParams.bitwidth = 32
     world_size = 16
     comms_utils.fixBeginSize(commsParams, world_size)
     # beginSize / element_size < world_size, so the new begin size should be element_size * world_size
     self.assertEqual(32, commsParams.beginSize)
예제 #3
0
 def test_all_to_all_quantized(self):
     commsParams = commsParamsTest()
     commsParams.collective = "all_to_all"
     commsParams.beginSize = 0
     commsParams.element_size = 2
     commsParams.bitwidth = 31 # Bitwidth less than 32 triggers quantization
     commsParams.quant_a2a_embedding_dim = 2
     world_size = 16
     comms_utils.fixBeginSize(commsParams, world_size)
     # (beginSize / element_size / world_size) < quant_a2a_embedding_dim, so the new begin size should be element_size * world_size * quant_a2a_embedding_dim
     self.assertEqual(64, commsParams.beginSize)
 def test_tensor_shrink_allgather(self):
     testBench = commsTraceReplayBench()
     testBench.backendFuncs = MockBackendFunction()
     commsParams = commsParamsTest()
     commsParams.dcheck = 1
     commsParams.device = "cpu"
     curComm = commsArgs(comms="all_gather", dtype="Int", inMsgSize=4, outMsgSize=4, worldSize=4)
     testBench.shrink = True
     testBench.collectiveArgs.world_size = 1
     (iptensor, optensor) = testBench.prepComms(curComm, commsParams)
     # tensor length should shrink to world size
     self.assertEqual(1, len(iptensor))
     self.assertEqual(1, len(optensor))
 def test_warm_up_bench(self):
     test_trace = [
                     createCommsArgs(comms="test", inMsgSize=1,
                      outMsgSize=1, markerStack=["test_stack"]),
                     createCommsArgs(comms="all_gather", inMsgSize=2,
                      outmsgSize=2),
                     createCommsArgs(comms="wait", markerStack=["test_stack"])
                  ]
     testBench = commsTraceReplayBench()
     testBench.backendFuncs = MockBackendFunction()
     testBench.comms_trace = test_trace
     commsParams = commsParamsTest()
     testBench.warmUpBench(commsParams)
     self.assertTrue(True) # just check to see if warmUpBench ran without failure
 def test_init_bench(self):
     testBench = commsTraceReplayBench()
     commsParams = commsParamsTest()
     args = testArgs()
     args.use_timestamp = True
     args.num_msg = 1000
     args.auto_shrink = False
     args.no_warm_up = False
     testBench.initBench(commsParams, args)
     # check if parameters are being set
     self.assertEqual(True, args.use_timestamp, testBench.use_timestamp)
     self.assertEqual(1000, args.num_msg, testBench.max_msg_cnt)
     self.assertEqual(False, args.auto_shrink, testBench.shrink)
     self.assertEqual(False, args.no_warm_up, not testBench.do_warm_up)
 def test_tensor_shrink_alltoallv(self):
     testBench = commsTraceReplayBench()
     testBench.backendFuncs = MockBackendFunction()
     commsParams = commsParamsTest()
     commsParams.dcheck = 1
     commsParams.device = "cpu"
     curComm = commsArgs(comms="all_to_allv", dtype="Int", inMsgSize=4, outMsgSize=4, inSplit=[1, 1, 1, 1], outSplit=[1, 1, 1, 1], worldSize=4)
     testBench.shrink = True
     testBench.collectiveArgs.world_size = 1
     (iptensor, optensor) = testBench.prepComms(curComm, commsParams)
     # tensor length should shrink to world size
     self.assertEqual(1, len(iptensor))
     self.assertEqual(1, len(optensor))
     # both input and output tensors should be equal to 1 for all_to_allv
     self.assertEqual(1, iptensor[0])
     self.assertEqual(1, optensor[0])
 def test_tensor_no_shrink(self):
     testBench = commsTraceReplayBench()
     testBench.backendFuncs = MockBackendFunction()
     commsParams = commsParamsTest()
     commsParams.dcheck = 1
     commsParams.device = "cpu"
     curComm = commsArgs(comms="recv", dtype="Int", inMsgSize=1, outMsgSize=1)
     testBench.shrink = False
     testBench.collectiveArgs.world_size = 1
     (iptensor, optensor) = testBench.prepComms(curComm, commsParams)
     # tensor length needs to match world_size
     self.assertEqual(1, len(iptensor))
     self.assertEqual(1, len(optensor))
     # both input and output tensors should be equal to 1
     self.assertEqual(1, iptensor[0])
     self.assertEqual(1, optensor[0])
 def test_no_tensor(self):
     # wait and barrier require no tensors
     testBench = commsTraceReplayBench()
     testBench.backendFuncs = MockBackendFunction()
     commsParams = commsParamsTest()
     commsParams.dcheck = 1
     commsParams.device = "cpu"
     curComm = commsArgs()
     curComm.comms = "wait"
     (iptensor, optensor) = testBench.prepComms(curComm, None)
     self.assertEqual(0, len(iptensor))
     self.assertEqual(0, len(optensor))
     curComm.comms = "barrier"
     (iptensor, optensor) = testBench.prepComms(curComm, None)
     self.assertEqual(0, len(iptensor))
     self.assertEqual(0, len(optensor))