コード例 #1
0
    def test_FunctionForwardCompressionConvFFTIndexBackCifar10LeNet1stLayer(
            self):
        start = time.time()
        x = cifar10_image
        print("shape of the input image: ", x.size())
        y = cifar10_lenet_filter
        print("shape of the filter: ", y.size())
        b = torch.tensor([0.0])
        # get the expected results from numpy correlate

        expected_result_tensor = F.conv2d(input=x, weight=y, bias=b)
        N, C, H, W = x.size()
        K, C, HH, WW = y.size()
        out_size = H - HH + 1
        fft_size = H + out_size - 1
        half_fft_size = fft_size // 2 + 1
        fft_numel = half_fft_size * fft_size * C

        # for compress_rate in range(1, fft_numel, 10):
        for index_back in range(1, 2):
            print("index back: ", index_back)
            conv = Conv2dfft(weight_value=y,
                             bias_value=b,
                             args=Arguments(
                                 index_back=index_back,
                                 preserve_energy=100,
                                 is_debug=True,
                                 next_power2=False,
                                 compress_type=CompressType.STANDARD))
            result = conv.forward(input=x)
            # print("actual result: ", result)

            result = result.float()
            abs_error = torch.sum(
                torch.abs(result - expected_result_tensor)).item()
            print("abs error: ", abs_error)
            expected_total = torch.sum(
                torch.abs(expected_result_tensor) + torch.abs(result))
            relative_error = 100.0 * abs_error / expected_total
            print("relative error: ", relative_error)
            # relative_error = torch.mean(torch.abs(result) / torch.abs(expected_result_tensor) * 100)
            print(f"absolute divergence for index back,{index_back},"
                  f"absolute error,{abs_error},"
                  f"relative error (%),{relative_error}")
        print("elapsed: ", time.time() - start)
コード例 #2
0
    def test_FunctionForwardCompressionConvFFTPreserveEnergyCifar10LeNet1stLayer(
            self):
        print("\n")
        x = cifar10_image
        print("shape of the input image: ", x.size())
        y = cifar10_lenet_filter
        print("shape of the filter: ", y.size())
        b = torch.tensor([0.0])
        # get the expected results from numpy correlate

        # print("expected_result_numpy: ", expected_result_numpy)

        preserved_energies = [100., 99., 98.5, 98., 97., 96., 95., 94., 93.,
                              92., 91., 90., 89., 87., 85., 80., 70., 60.,
                              50.,
                              40., 10., 5., 1.]
        # preserved_energies = [1.0]
        # compress_rates = [1, 2, 4, 8, 16, 32, 64, 128, 256]

        expected_result_tensor = F.conv2d(input=x, weight=y, bias=b)

        for preserve_energy in preserved_energies:
            conv = Conv2dfft(weight_value=y,
                             bias_value=b,
                             args=Arguments(
                                 preserve_energy=preserve_energy,
                                 index_back=0,
                                 is_debug=True,
                                 next_power2=True,
                                 compress_type=CompressType.STANDARD))
            result = conv.forward(input=x)
            # print("actual result: ", result)

            result = result.float()
            abs_error = torch.sum(
                torch.abs(result - expected_result_tensor)).item()
            expected_total = torch.sum(torch.abs(expected_result_tensor))
            relative_error = abs_error / expected_total * 100.0
            # relative_error = torch.mean(torch.abs(result) / torch.abs(expected_result_tensor) * 100)
            print(
                f"absolute divergence for preserved energy,{preserve_energy}"
                f",absolute error,{abs_error},"
                f"relative error (%),{relative_error}")
コード例 #3
0
def run():
    N, C, H, W = 16, 3, 32, 32
    F = 32
    HH, WW = 3, 3

    if torch.cuda.is_available():
        print("Cuda is available.")
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    x = torch.randn(N, C, H, W, device=device)
    y = torch.randn(F, C, HH, WW, device=device)
    b = torch.randn(N, F, H, W, device=device)

    layer_cpp = Conv2dfftCpp(weight_value=y, padding=2)
    layer_python = Conv2dfft(weight_value=y, padding=2)

    time_it(layer=layer_cpp, name="cpp")
    time_it(layer=layer_python, name="python")
コード例 #4
0
    def get_conv(self, param_index=0, compress_rate=None):
        if param_index == 0:
            in_channels = self.in_channels
        else:
            in_channels = self.out_channels[param_index - 1]

        if compress_rate is None:
            compress_rate = self.compress_rate

        if self.conv_type is ConvType.STANDARD:
            return nn.Conv1d(in_channels=in_channels,
                             out_channels=self.out_channels[param_index],
                             stride=self.strides[param_index],
                             kernel_size=self.kernel_sizes[param_index],
                             padding=self.padding[param_index],
                             bias=self.is_bias)
        elif self.conv_type is ConvType.STANDARD2D:
            return nn.Conv2d(in_channels=in_channels,
                             out_channels=self.out_channels[param_index],
                             stride=self.strides[param_index],
                             kernel_size=self.kernel_sizes[param_index],
                             padding=self.padding[param_index],
                             bias=self.is_bias)
        elif self.conv_type is ConvType.FFT1D:
            return Conv1dfft(in_channels=in_channels,
                             out_channels=self.out_channels[param_index],
                             stride=self.strides[param_index],
                             kernel_size=self.kernel_sizes[param_index],
                             padding=self.padding[param_index],
                             bias=self.is_bias,
                             args=self.args)
        elif self.conv_type is ConvType.FFT2D:
            return Conv2dfft(in_channels=in_channels,
                             out_channels=self.out_channels[param_index],
                             stride=self.strides[param_index],
                             kernel_size=self.kernel_sizes[param_index],
                             padding=self.padding[param_index],
                             bias=self.is_bias,
                             args=self.args)
        elif self.conv_type is ConvType.DCT:
            return ConvDCT(in_channels=in_channels,
                           out_channels=self.out_channels[param_index],
                           stride=self.strides[param_index],
                           kernel_size=self.kernel_sizes[param_index],
                           padding=self.padding[param_index],
                           bias=self.is_bias,
                           args=self.args)
        elif self.conv_type is ConvType.AUTOGRAD:
            return Conv1dfftAutograd(in_channels=in_channels,
                                     out_channels=self.out_channels[
                                         param_index],
                                     stride=self.strides[param_index],
                                     kernel_size=self.kernel_sizes[param_index],
                                     padding=self.padding[param_index],
                                     index_back=compress_rate,
                                     bias=self.is_bias)
        elif self.conv_type is ConvType.AUTOGRAD2D:
            return Conv2dfftAutograd(in_channels=in_channels,
                                     out_channels=self.out_channels[
                                         param_index],
                                     stride=self.strides[param_index],
                                     kernel_size=self.kernel_sizes[param_index],
                                     padding=self.padding[param_index],
                                     bias=self.is_bias,
                                     args=self.args)
        elif self.conv_type is ConvType.SIMPLE_FFT:
            return Conv1dfftSimple(in_channels=in_channels,
                                   out_channels=self.out_channels[param_index],
                                   stride=self.strides[param_index],
                                   kernel_size=self.kernel_sizes[param_index],
                                   padding=self.padding[param_index],
                                   index_back=compress_rate,
                                   bias=self.is_bias)
        elif self.conv_type is ConvType.SIMPLE_FFT_FOR_LOOP:
            return Conv1dfftSimpleForLoop(in_channels=in_channels,
                                          out_channels=self.out_channels[
                                              param_index],
                                          stride=self.strides[param_index],
                                          kernel_size=self.kernel_sizes[
                                              param_index],
                                          padding=self.padding[param_index],
                                          index_back=compress_rate,
                                          bias=self.is_bias)
        elif self.conv_type is ConvType.COMPRESS_INPUT_ONLY:
            return Conv1dfftCompressSignalOnly(
                in_channels=in_channels,
                out_channels=self.out_channels[param_index],
                stride=self.strides[param_index],
                kernel_size=self.kernel_sizes[param_index],
                padding=self.padding[param_index],
                index_back=compress_rate,
                preserve_energy=self.preserve_energy,
                bias=self.is_bias)
        else:
            raise Exception(CONV_TYPE_ERROR)
コード例 #5
0
    def test_forward_backward_performance(self):
        dtype = torch.float
        if torch.cuda.is_available():
            device = torch.device("cuda")
        else:
            device = torch.device("cpu")
        print("device used: ", str(device))

        N, C, H, W, K, HH, WW, padding = 32, 3, 32, 32, 64, 3, 3, 0

        natural_image = True
        if natural_image:
            x = cifar10_image[:, :1, :H, :W]
            x_new = x.expand(N, C, -1, -1).clone()  # specifies new size
            del x
            print("x size: ", x_new.size())
            x = x_new.to(device)
            x.requires_grad_(True)
        else:
            x = torch.randn(N,
                            C,
                            H,
                            W,
                            dtype=dtype,
                            device=device,
                            requires_grad=True)
        x_expect = x.clone().detach().requires_grad_(True)
        y = torch.randn(K,
                        C,
                        HH,
                        WW,
                        dtype=dtype,
                        device=device,
                        requires_grad=True)
        y_expect = y.clone().detach().requires_grad_(True)

        print("input size: ", x.size())
        print("filter size: ", y.size())
        print("padding: ", padding)
        from .conv2D_fft import global_threshold
        repetitions = global_threshold
        print("repetitions: ", repetitions)
        preserve_energy = 80
        print("preserve energy: ", preserve_energy)
        stride = 1
        print("stride: ", stride)
        next_power2 = True
        print("next_power2: ", str(next_power2))
        print("cuda exec type: ", self.conv_exec_type.name)
        compress_rate = 0.0
        print("compress rate: ", compress_rate)

        # warm-up
        torch.nn.functional.conv2d(input=x_expect,
                                   weight=y_expect,
                                   stride=stride,
                                   padding=padding)

        start = time.time()
        for _ in range(repetitions):
            convStandard = torch.nn.functional.conv2d(input=x_expect,
                                                      weight=y_expect,
                                                      stride=stride,
                                                      padding=padding)
        convStandardTime = time.time() - start
        print("convStandard time: ", convStandardTime)

        conv = Conv2dfft(weight_value=y,
                         stride=stride,
                         bias=False,
                         padding=padding,
                         args=Arguments(stride_type=StrideType.STANDARD,
                                        min_batch_size=N,
                                        is_debug=True,
                                        preserved_energy=preserve_energy,
                                        next_power2=next_power2,
                                        conv_exec_type=self.conv_exec_type,
                                        compress_rate=compress_rate,
                                        compress_rates=[compress_rate]))

        # warm-up
        conv.forward(input=x)

        start = time.time()
        for _ in range(repetitions):
            convFFT = conv.forward(input=x)
        convFFTtime = time.time() - start
        print("convFFT time: ", convFFTtime)
        speedup = convFFTtime / convStandardTime
        print(f"Pytorch forward pass speedup is: {speedup}")

        if compress_rate == 0.0 and preserve_energy == 100:
            np.testing.assert_array_almost_equal(
                x=convStandard.cpu().detach().numpy(),
                y=convFFT.cpu().detach().numpy(),
                decimal=1,
                err_msg=
                "The expected array x and computed y are not almost equal.")

        dout = torch.randn(list(convStandard.size()),
                           device=device,
                           dtype=dtype)
        dout_clone = dout.clone()

        # warm-up
        convStandard.backward(dout, retain_graph=True)

        standard_back_time_start = time.time()
        for _ in range(repetitions):
            convStandard.backward(dout, retain_graph=True)
        standard_back_time = time.time() - standard_back_time_start
        print("standard back time: ", standard_back_time)

        # warm-up
        convFFT.backward(dout_clone, retain_graph=True)

        fft_back_time_start = time.time()
        for _ in range(repetitions):
            convFFT.backward(dout_clone, retain_graph=True)
        conv_fft_back_time = time.time() - fft_back_time_start
        assert conv.is_manual[0] == 1
        print("conv fft back time: ", conv_fft_back_time)
        speedup = conv_fft_back_time / standard_back_time
        print(f"Pytorch speedup for backprop: {speedup}")

        full_pass_fft = convFFTtime + conv_fft_back_time
        print("full pass fft:", full_pass_fft)
        full_pass_pytorch = convStandardTime + standard_back_time
        print("full pass pytorch: ", full_pass_pytorch)
        speedup_full_pass = full_pass_fft / full_pass_pytorch
        print(f"Pytorch speedup for full pass: {speedup_full_pass}")

        if compress_rate == 0.0 and preserve_energy == 100:
            np.testing.assert_array_almost_equal(
                x.grad.cpu().detach().numpy(),
                x_expect.grad.cpu().detach().numpy(),
                decimal=1)

            np.testing.assert_array_almost_equal(
                y.grad.cpu().detach().numpy(),
                y_expect.grad.cpu().detach().numpy(),
                decimal=1)
コード例 #6
0
    def test_forward_backward(self):
        dtype = torch.float
        if torch.cuda.is_available():
            device = torch.device("cuda")
        else:
            device = torch.device("cpu")
        print("device used: ", str(device))
        N, C, H, W = 128, 16, 32, 32
        K, HH, WW = 16, 3, 3
        x = torch.randn(N,
                        C,
                        H,
                        W,
                        dtype=dtype,
                        device=device,
                        requires_grad=True)
        x_expect = x.clone().detach().requires_grad_(True)
        y = torch.randn(K,
                        C,
                        HH,
                        WW,
                        dtype=dtype,
                        device=device,
                        requires_grad=True)
        y_expect = y.clone().detach().requires_grad_(True)
        start = time.time()
        convStandard = torch.nn.functional.conv2d(input=x_expect,
                                                  weight=y_expect,
                                                  stride=1)
        convStandardTime = time.time() - start
        print("convStandard time: ", convStandardTime)

        conv = Conv2dfft(weight_value=y,
                         stride=1,
                         bias=False,
                         args=Arguments(stride_type=StrideType.STANDARD))
        start = time.time()
        convFFT = conv.forward(input=x)
        convFFTtime = time.time() - start
        print("convFFT time: ", convFFTtime)
        speedup = convFFTtime / convStandardTime
        print(f"Pytorch forward pass speedup is: {speedup} X")

        np.testing.assert_array_almost_equal(
            x=convStandard.cpu().detach().numpy(),
            y=convFFT.cpu().detach().numpy(),
            decimal=3,
            err_msg="The expected array x and computed y are not almost equal."
        )

        dout = torch.randn(list(convStandard.size()),
                           device=device,
                           dtype=dtype)
        dout_clone = dout.clone()

        standard_back_time_start = time.time()
        convStandard.backward(dout)
        standard_back_time = time.time() - standard_back_time_start
        print("standard back time: ", standard_back_time)

        fft_back_time_start = time.time()
        convFFT.backward(dout_clone)
        conv_fft_back_time = time.time() - fft_back_time_start
        assert conv.is_manual[0] == 1
        print("conv fft back time: ", conv_fft_back_time)
        speedup = conv_fft_back_time / standard_back_time
        print(f"Pytorch speedup for backprop: {speedup} X")

        np.testing.assert_array_almost_equal(
            x.grad.cpu().detach().numpy(),
            x_expect.grad.cpu().detach().numpy(),
            decimal=3)

        np.testing.assert_array_almost_equal(
            y.grad.cpu().detach().numpy(),
            y_expect.grad.cpu().detach().numpy(),
            decimal=3)
コード例 #7
0
    def test_forward_timing(self):
        """
        device used:  cuda
        x size:  torch.Size([32, 3, 32, 32])
        input size:  torch.Size([32, 3, 32, 32])
        filter size:  torch.Size([64, 3, 3, 3])
        padding:  0
        repetitions:  1000
        preserve energy:  100
        next_power2:  False
        cuda exec type:  CUDA
        output size:  torch.Size([32, 64, 30, 30])
        PyTorch conv2D:  0.6601831912994385
        compress rate:  80.0
        conv FFT time:  16.30516004562378
        Pytorch speedup: 24.697932726112484


        """
        dtype = torch.float
        if torch.cuda.is_available():
            device = torch.device("cuda")
            print("\nTorch CUDA is available")
        else:
            device = torch.device("cpu")
        print("device used: ", str(device))
        # # 1st layer
        # N, C, H, W, K, HH, WW = 32, 3, 32, 32, 64, 3, 3
        # 7th layer
        # N, C, H, W, K, HH, WW = 32, 256, 4, 4, 256, 3, 3
        # last layer
        # N, C, H, W, K, HH, WW = 32, 256, 4, 4, 512, 3, 3
        for N, C, H, W, K, HH, WW, padding in [
            (32, 3, 32, 32, 64, 3, 3, 0),
                # (32, 3, 32, 32, 64, 3, 3, 1),
                # (32, 3, 32, 32, 64, 7, 7, 3),
                # (32, 64, 16, 16, 64, 3, 3, 1),
                # (32, 256, 4, 4, 256, 3, 3, 1),
                # (32, 512, 2, 2, 512, 3, 3, 1),
        ]:
            natural_image = True
            if natural_image:
                x = cifar10_image[:, :1, :H, :W]
                x_new = x.expand(N, C, -1, -1).clone()  # specifies new size
                del x
                print("x size: ", x_new.size())
                x = x_new.to(device)
            else:
                x = torch.randn(N, C, H, W, dtype=dtype, device=device)

            y = torch.randn(K, C, HH, WW, dtype=dtype, device=device)

            print("input size: ", x.size())
            print("filter size: ", y.size())
            print("padding: ", padding)
            repetitions = 1000
            print("repetitions: ", repetitions)
            preserve_energy = 100
            print("preserve energy: ", preserve_energy)
            stride = 1
            next_power2 = False
            print("next_power2: ", str(next_power2))
            print("cuda exec type: ", self.conv_exec_type.name)

            # print("preserve energy: ", preserve_energy)
            # print("min_batch_size (equivalent to the batch slice for fft): ", N)
            # print("next power 2: ", next_power2)

            convStandard = torch.nn.Conv2d(in_channels=C,
                                           out_channels=K,
                                           kernel_size=(HH, WW),
                                           stride=stride,
                                           padding=padding)
            convStandard.to(device)
            out_standard = convStandard.forward(x)
            print("output size: ", out_standard.size())

            start = time.time()
            for repeat in range(repetitions):
                convStandard.forward(x)
            convStandardTime = time.time() - start
            print("PyTorch conv2D: ", convStandardTime)

            # print("compress_rate, FFT conv2D:")
            # for compress_rate in range(0, 86, 5):
            for compress_rate in [80.0]:
                compress_rate = float(compress_rate)
                print("compress rate: ", compress_rate)

                conv = Conv2dfft(weight_value=y,
                                 stride=stride,
                                 padding=padding,
                                 args=Arguments(
                                     stride_type=StrideType.STANDARD,
                                     min_batch_size=N,
                                     is_debug=True,
                                     preserved_energy=preserve_energy,
                                     next_power2=next_power2,
                                     conv_exec_type=self.conv_exec_type,
                                     compress_rate=compress_rate,
                                     compress_rates=[compress_rate]))
                conv.to(device)
                start = time.time()
                for repeat in range(repetitions):
                    conv.forward(input=x)
                convFFTtime = time.time() - start
                # print(compress_rate, ",", convFFTtime)
                print("conv FFT time: ", convFFTtime)
                # del conv
                speedup = convFFTtime / convStandardTime
                print(f"Pytorch speedup: {speedup}")