Beispiel #1
0
    def __init__(self):
        super(NNPoolingModule, self).__init__()
        self.input1d = torch.randn(1, 16, 50)
        self.module1d = nn.ModuleList([
            nn.MaxPool1d(3, stride=2),
            nn.AvgPool1d(3, stride=2),
            nn.LPPool1d(2, 3, stride=2),
            nn.AdaptiveMaxPool1d(3),
            nn.AdaptiveAvgPool1d(3),
        ])

        self.input2d = torch.randn(1, 16, 30, 10)
        self.module2d = nn.ModuleList([
            nn.MaxPool2d((3, 2), stride=(2, 1)),
            nn.AvgPool2d((3, 2), stride=(2, 1)),
            nn.FractionalMaxPool2d(3, output_ratio=(0.5, 0.5)),
            nn.LPPool2d(2, 3, stride=(2, 1)),
            nn.AdaptiveMaxPool2d((5, 7)),
            nn.AdaptiveAvgPool2d((7)),
        ])

        self.input3d = torch.randn(1, 16, 20, 4, 4)
        self.module3d = nn.ModuleList([
            nn.MaxPool3d(2),
            nn.AvgPool3d(2),
            nn.FractionalMaxPool3d(2, output_ratio=(0.5, 0.5, 0.5)),
            nn.AdaptiveMaxPool3d((5, 7, 9)),
            nn.AdaptiveAvgPool3d((5, 7, 9)),
        ])
 def __init__(self):
     super(AudioNet, self).__init__()
     # audio layers
     # input of form [Batch,channels,time,height width]
     self.a_conv1 = nn.Conv3d(in_channels=2,
                              out_channels=64,
                              kernel_size=[65, 1, 1],
                              stride=4,
                              padding=(32, 1, 1),
                              bias=False)
     self.bn1 = nn.BatchNorm3d(64)
     self.a_pool1 = nn.MaxPool3d(kernel_size=[4, 1, 1],
                                 stride=[4, 1, 1],
                                 padding=(1, 0, 0))
     self.a_res1 = residual_block(64, 128, [15, 1, 1], (7, 0, 0), [4, 1, 1])
     self.a_res2 = residual_block(128, 128, [15, 1, 1], (7, 0, 0),
                                  [4, 1, 1])
     self.a_res3 = residual_block(128, 256, [15, 1, 1], (7, 0, 0),
                                  [4, 1, 1])
     self.a_pool2 = nn.FractionalMaxPool3d(kernel_size=[3, 1, 1],
                                           output_size=(32, 1, 1))
     self.a_conv2 = nn.Conv3d(in_channels=256,
                              out_channels=128,
                              kernel_size=[3, 1, 1],
                              padding=(1, 0, 0),
                              bias=False)
     self.bn2 = nn.BatchNorm3d(128)
Beispiel #3
0
                                                      W=[32, 64],
                                                      device=['cpu', 'cuda'],
                                                      tags=['long'])

pool_3d_ops_list = op_bench.op_list(
    attr_names=['op_name', 'op_func'],
    attrs=[
        ['MaxPool3d', nn.MaxPool3d],
        ['AvgPool3d', nn.AvgPool3d],
        [
            'AdaptiveMaxPool3d',
            lambda kernel, stride: nn.AdaptiveMaxPool3d(kernel)
        ],
        [
            'FractionalMaxPool3d',
            lambda kernel, stride: nn.FractionalMaxPool3d(kernel,
                                                          output_size=2)
        ],
    ],
)


class Pool3dBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, kernel, stride, N, C, D, H, W, device, op_func):
        self.input = torch.rand(N, C, D, H, W, device=device)
        self.kernel = kernel
        self.stride = stride
        self.op_func = op_func(self.kernel, stride=self.stride)

    def forward(self):
        return self.op_func(self.input)
Beispiel #4
0
    def __init__(self):
        super(AVNet, self).__init__()
        # AV network
        # x and y inputs corresponding to image frames and audio frames
        # The paper assumes random cropped images of 256*256 resized to 224*224
        # define the layers as described in the paper
        # layers for image
        self.im_conv1 = nn.Conv3d(in_channels=3,
                                  out_channels=64,
                                  kernel_size=[5, 7, 7],
                                  stride=[2, 2, 2],
                                  padding=(2, 3, 3))
        self.im_pool1 = nn.MaxPool3d(kernel_size=[1, 3, 3],
                                     stride=[1, 2, 2],
                                     padding=(0, 1, 1))
        self.im_res1 = residual_block(64, 64, [3, 3, 3], [1, 1, 1], 1)
        self.im_res2 = residual_block(64, 64, [3, 3, 3], [1, 1, 1], [2, 2, 2])
        # audio layers
        # input of form [Batch,channels,time,height width]
        self.a_conv1 = nn.Conv3d(in_channels=2,
                                 out_channels=64,
                                 kernel_size=[65, 1, 1],
                                 stride=4,
                                 padding=(32, 1, 1))
        self.a_pool1 = nn.MaxPool3d(kernel_size=[4, 1, 1],
                                    stride=[4, 1, 1],
                                    padding=(1, 0, 0))
        self.a_res1 = residual_block(64, 128, [15, 1, 1], (7, 0, 0), [4, 1, 1])
        self.a_res2 = residual_block(128, 128, [15, 1, 1], (7, 0, 0),
                                     [4, 1, 1])
        self.a_res3 = residual_block(128, 256, [15, 1, 1], (7, 0, 0),
                                     [4, 1, 1])
        self.a_pool2 = nn.FractionalMaxPool3d(kernel_size=[3, 1, 1],
                                              output_size=(32, 1, 1))
        self.a_conv2 = nn.Conv3d(in_channels=256,
                                 out_channels=128,
                                 kernel_size=[3, 1, 1],
                                 padding=(1, 0, 0))
        # fusion layers
        self.f_conv1 = nn.Conv3d(in_channels=192,
                                 out_channels=512,
                                 kernel_size=[1, 1, 1])
        self.f_conv2 = nn.Conv3d(in_channels=512,
                                 out_channels=128,
                                 kernel_size=[1, 1, 1])
        self.bn_f = nn.BatchNorm3d(128)
        self.relu_f = nn.ReLU(inplace=True)

        self.c_res1 = residual_block(128, 128, [3, 3, 3], (1, 1, 1), 1)
        self.c_res2 = residual_block(128, 128, [3, 3, 3], (1, 1, 1), 1)

        self.c_res3 = residual_block(128, 256, [3, 3, 3], (1, 1, 1), [2, 2, 2])
        self.c_res4 = residual_block(256, 256, [3, 3, 3], (1, 1, 1), 1)

        self.c_res5 = residual_block(256, 512, [3, 3, 3], (1, 1, 1), [1, 2, 2])
        self.c_res6 = residual_block(512, 512, [3, 3, 3], (1, 1, 1), 1)

        self.avgpool = nn.AvgPool3d([16, 7, 7])
        self.f_fcn = nn.Linear(512, 1)
        self.cmm_weights = self.f_fcn.weight
        # activations and fc layers
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()
        self.logsigmoid = nn.LogSigmoid()