def _construct(self, dim_in, dim_out, stride, dim_inner, num_groups, trans_func_name, stride_1x1, inplace_relu, nonlocal_inds, nonlocal_pool, instantiation, dilation, norm_module, type_conv): for pathway in range(self.num_pathways): for i in range(self.num_blocks[pathway]): # Retrieve the transformation function. trans_func = get_trans_func(trans_func_name) # Construct the block. res_block = ResBlock( dim_in[pathway] if i == 0 else dim_out[pathway], dim_out[pathway], self.temp_kernel_sizes[pathway][i], stride[pathway] if i == 0 else 1, trans_func, dim_inner[pathway], num_groups[pathway], stride_1x1=stride_1x1, inplace_relu=inplace_relu, dilation=dilation[pathway], norm_module=norm_module, type_conv=type_conv, ) self.add_module("pathway{}_res{}".format(pathway, i), res_block) if i in nonlocal_inds[pathway]: nln = Nonlocal( dim_out[pathway], dim_out[pathway] // 2, nonlocal_pool[pathway], instantiation=instantiation, norm_module=norm_module, ) self.add_module("pathway{}_nonlocal{}".format(pathway, i), nln)
def __init__( self, dim_in, dim_out, stride, temp_kernel_sizes, num_blocks, dim_inner, num_groups, num_block_temp_kernel, nonlocal_inds, nonlocal_group, instantiation="softmax", trans_func_name="bottleneck_transform", stride_1x1=False, inplace_relu=True, ): """ The `__init__` method of any subclass should also contain these arguments. ResStage builds p streams, where p can be greater or equal to one. Args: dim_in (list): list of p the channel dimensions of the input. Different channel dimensions control the input dimension of different pathways. dim_out (list): list of p the channel dimensions of the output. Different channel dimensions control the input dimension of different pathways. temp_kernel_sizes (list): list of the p temporal kernel sizes of the convolution in the bottleneck. Different temp_kernel_sizes control different pathway. stride (list): list of the p strides of the bottleneck. Different stride control different pathway. num_blocks (list): list of p numbers of blocks for each of the pathway. dim_inner (list): list of the p inner channel dimensions of the input. Different channel dimensions control the input dimension of different pathways. num_groups (list): list of number of p groups for the convolution. num_groups=1 is for standard ResNet like networks, and num_groups>1 is for ResNeXt like networks. num_block_temp_kernel (list): extent the temp_kernel_sizes to num_block_temp_kernel blocks, then fill temporal kernel size of 1 for the rest of the layers. nonlocal_inds (list): If the tuple is empty, no nonlocal layer will be added. If the tuple is not empty, add nonlocal layers after the index-th block. nonlocal_group (list): list of number of p nonlocal groups. Each number controls how to fold temporal dimension to batch dimension before applying nonlocal transformation. https://github.com/facebookresearch/video-nonlocal-net. instantiation (string): different instantiation for nonlocal layer. Supports two different instantiation method: "dot_product": normalizing correlation matrix with L2. "softmax": normalizing correlation matrix with Softmax. trans_func_name (string): name of the the transformation function apply on the network. """ super(ResStage, self).__init__() assert all((num_block_temp_kernel[i] <= num_blocks[i] for i in range(len(temp_kernel_sizes)))) self.num_blocks = num_blocks self.nonlocal_group = nonlocal_group self.temp_kernel_sizes = [ (temp_kernel_sizes[i] * num_blocks[i])[:num_block_temp_kernel[i]] + [1] * (num_blocks[i] - num_block_temp_kernel[i]) for i in range(len(temp_kernel_sizes)) ] assert (len({ len(dim_in), len(dim_out), len(temp_kernel_sizes), len(stride), len(num_blocks), len(dim_inner), len(num_groups), len(num_block_temp_kernel), len(nonlocal_inds), len(nonlocal_group), }) == 1) self.num_pathways = len(self.num_blocks) for pathway in range(self.num_pathways): for i in range(self.num_blocks[pathway]): # Retrieve the transformation function. trans_func = get_trans_func(trans_func_name) # Construct the block. res_block = ResBlock( dim_in[pathway] if i == 0 else dim_out[pathway], dim_out[pathway], self.temp_kernel_sizes[pathway][i], stride[pathway] if i == 0 else 1, trans_func, dim_inner[pathway], num_groups[pathway], stride_1x1=stride_1x1, inplace_relu=inplace_relu, ) self.add_module("pathway{}_res{}".format(pathway, i), res_block) if i in nonlocal_inds[pathway]: nln = Nonlocal( dim_out[pathway], dim_out[pathway] // 2, [1, 2, 2], instantiation=instantiation, ) self.add_module("pathway{}_nonlocal{}".format(pathway, i), nln)