print("input size: {0}".format(HIGH_BATCH_INPUT_SIZE)) run_profile( nn.parallel.DataParallel(AsymmetricQuantizer( QuantizerConfig(QuantizationParams(bits=NBITS))).cuda(), device_ids=device_ids), HIGH_BATCH_INPUT_SIZE, 'cuda', GPU_RUNS_HIGH_BATCH) # CUDA DataParallel high batch # wall time print() print("CUDA DataParallel high batch") print("------------------------------------------------") print("Pytorch Symmetric(cuda {0}) DataParallel impl:".format(device_ids)) print("input size: {0}".format(HIGH_BATCH_INPUT_SIZE)) run_wall( nn.parallel.DataParallel(ReferenceQuantize(NBITS).cuda(), device_ids=device_ids), HIGH_BATCH_INPUT_SIZE, 'cuda', GPU_RUNS_HIGH_BATCH) print() print("Custom Symmetric (cuda {0}) DataParallel impl:".format(device_ids)) print("input size: {0}".format(HIGH_BATCH_INPUT_SIZE)) run_wall( nn.parallel.DataParallel(SymmetricQuantizer( QuantizerConfig(QuantizationParams(bits=NBITS))).cuda(), device_ids=device_ids), HIGH_BATCH_INPUT_SIZE, 'cuda', GPU_RUNS_HIGH_BATCH) print() print("Custom Assymetric (cuda {0}) DataParallel impl:".format(device_ids)) print("input size: {0}".format(HIGH_BATCH_INPUT_SIZE)) run_wall(
run_profile( nn.parallel.DataParallel(AsymmetricQuantizer( DefaultedPTQuantizerSpec( num_bits=NBITS, scale_shape=per_tensor_scale_shape)).cuda(), device_ids=device_ids), HIGH_BATCH_INPUT_SIZE, 'cuda', GPU_RUNS_HIGH_BATCH) # CUDA DataParallel high batch # wall time print() print("CUDA DataParallel high batch") print("------------------------------------------------") print("Pytorch Symmetric(cuda {0}) DataParallel impl:".format(device_ids)) print("input size: {0}".format(HIGH_BATCH_INPUT_SIZE)) run_wall( nn.parallel.DataParallel(ReferenceQuantize(NBITS).cuda(), device_ids=device_ids), HIGH_BATCH_INPUT_SIZE, 'cuda', GPU_RUNS_HIGH_BATCH) print() print("Custom Symmetric (cuda {0}) DataParallel impl:".format(device_ids)) print("input size: {0}".format(HIGH_BATCH_INPUT_SIZE)) run_wall( nn.parallel.DataParallel(SymmetricQuantizer( DefaultedPTQuantizerSpec( num_bits=NBITS, scale_shape=per_tensor_scale_shape)).cuda(), device_ids=device_ids), HIGH_BATCH_INPUT_SIZE, 'cuda', GPU_RUNS_HIGH_BATCH) print() print("Custom Asymmetric (cuda {0}) DataParallel impl:".format(device_ids)) print("input size: {0}".format(HIGH_BATCH_INPUT_SIZE))