Ejemplo n.º 1
0
        device = torch.device("cpu")
    print("device used: ", str(device))

    args = Arguments()
    args.in_channels = 3
    # args.conv_type = "FFT2D"
    args.conv_type = ConvType.STANDARD2D
    args.compress_rate = None
    args.preserve_energy = None
    args.is_debug = False
    args.next_power2 = True
    args.compress_type = CompressType.STANDARD
    args.tensor_type = TensorType.FLOAT32
    args.num_classes = 10
    args.min_batch_size = 16
    args.test_batch_size = 16

    batch_size = 16
    inputs = torch.randn(batch_size,
                         args.in_channels,
                         32,
                         32,
                         dtype=dtype,
                         device=device)

    model = resnet18(args=args)
    model.to(device)
    model.eval()
    start_eval = time.time()
    outputs_standard = model(inputs)
    standard_time = time.time() - start_eval
Ejemplo n.º 2
0
    def test_forward_pass_resnet18(self):
        """
        total time for (ConvType.STANDARD2D-ConvExecType.SERIAL): 6.813918352127075
        total time for (ConvType.FFT2D-ConvExecType.CUDA): 53.35197567939758
        total time for (ConvType.FFT2D-ConvExecType.SGEMM): 55.51149845123291

        total time for (ConvType.STANDARD2D-ConvExecType.SERIAL): 6.736859083175659
        total time for (ConvType.FFT2D-ConvExecType.CUDA): 53.84979581832886
        total time for (ConvType.FFT2D-ConvExecType.SGEMM): 56.26755166053772

        global init time:  0.24471688270568848
        global pad time:  4.250756025314331
        (r)fft time:  8.754997730255127
        conjugate time:  3.734828233718872
        correlation time:  25.324009656906128
        restore time (de-compress/concat output):  0.021800994873046875
        i(r)fft time:  8.525353193283081
        total time for (ConvType.FFT2D-ConvExecType.SGEMM): 56.27733850479126
        GPU mem: 2903

        global init time:  0.2371835708618164
        global pad time:  4.492943286895752
        (r)fft time:  9.08437442779541
        conjugate time:  3.8394811153411865
        correlation time:  25.043412446975708
        restore time (de-compress/concat output):  0.021334409713745117
        i(r)fft time:  5.491833925247192
        total time for (ConvType.FFT2D-ConvExecType.CUDA): 53.804604053497314
        GPU mem: 2679
        """
        if not torch.cuda.is_available():
            print("CUDA device is not available.")
            return

        device = torch.device("cuda")
        print("\ndevice used: ", str(device))

        C = 3
        # dtype = torch.float
        # random mini batch imitating cifar-10
        # N, H, W = 128, 32, 32
        # inputs = torch.randn(N, C, H, W, dtype=dtype, device=device,
        #                      requires_grad=True)
        args = Arguments()
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)

        args.sample_count_limit = 10000
        args.min_batch_size = 32
        args.dataset_name = "cifar10"
        args.test_batch_size = args.min_batch_size
        args.network_type = NetworkType.ResNet18
        from cnns.nnlib.datasets.cifar import get_cifar
        train_loader, test_loader, _, _ = get_cifar(
            args=args, dataset_name=args.dataset_name)

        repetition = 1

        args.in_channels = C
        args.compress_rate = None
        args.preserve_energy = 100
        args.is_debug = True
        args.next_power2 = True
        args.compress_type = CompressType.STANDARD
        args.tensor_type = TensorType.FLOAT32
        args.num_classes = 10
        args.test_batch_size = args.min_batch_size
        args.in_channels = C
        args.dtype = torch.float32
        conv_exec_types = [  # (ConvType.STANDARD2D, ConvExecType.SERIAL),
            # (ConvType.FFT2D, ConvExecType.CUDA),
            (ConvType.FFT2D, ConvExecType.SGEMM),
            # (ConvType.FFT2D, ConvExecType.CUDA_SHARED_LOG),
            # (ConvType.FFT2D, ConvExecType.CUDA_DEEP),
            # (ConvType.FFT2D, ConvExecType.SERIAL),
            # (ConvType.FFT2D, ConvExecType.BATCH),
        ]

        for conv_type, conv_exec_type in conv_exec_types:
            args.conv_type = conv_type
            args.conv_exec_type = conv_exec_type
            model = resnet18(args=args)
            model.to(device)
            model.eval()
            start_eval = time.time()
            for _ in range(repetition):
                for inputs, _ in train_loader:
                    inputs = inputs.to(device)
                    outputs_standard = model(inputs)
            standard_time = time.time() - start_eval
            print(f"total time for ({conv_type}-{conv_exec_type}):"
                  f" {standard_time}")