def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 stride=1,
                 padding=0,
                 dilation=1,
                 pad_type='zero',
                 activation='lrelu',
                 norm='none',
                 sn=False):
        super(Conv2dLayer, self).__init__()
        # Initialize the padding scheme
        if pad_type == 'reflect':
            self.pad = nn.ReflectionPad2d(padding)
        elif pad_type == 'replicate':
            self.pad = nn.ReplicationPad2d(padding)
        elif pad_type == 'zero':
            self.pad = nn.ZeroPad2d(padding)
        else:
            assert 0, "Unsupported padding type: {}".format(pad_type)

        # Initialize the normalization type
        if norm == 'bn':
            self.norm = nn.BatchNorm2d(out_channels)
        elif norm == 'in':
            self.norm = nn.InstanceNorm2d(out_channels)
        elif norm == 'ln':
            self.norm = LayerNorm(out_channels)
        elif norm == 'none':
            self.norm = None
        else:
            assert 0, "Unsupported normalization: {}".format(norm)

        # Initialize the activation funtion
        if activation == 'relu':
            self.activation = nn.ReLU(inplace=True)
        elif activation == 'lrelu':
            self.activation = nn.LeakyReLU(0.2, inplace=True)
        elif activation == 'prelu':
            self.activation = nn.PReLU()
        elif activation == 'selu':
            self.activation = nn.SELU(inplace=True)
        elif activation == 'tanh':
            self.activation = nn.Tanh()
        elif activation == 'sigmoid':
            self.activation = nn.Sigmoid()
        elif activation == 'none':
            self.activation = None
        else:
            assert 0, "Unsupported activation: {}".format(activation)

        # Initialize the convolution layers
        if sn:
            self.conv2d = SpectralNorm(
                nn.Conv2d(in_channels,
                          out_channels,
                          kernel_size,
                          stride,
                          padding=0,
                          dilation=dilation))
        else:
            self.conv2d = nn.Conv2d(in_channels,
                                    out_channels,
                                    kernel_size,
                                    stride,
                                    padding=0,
                                    dilation=dilation)
 def __init__(self):
     super(MaxPoolPad, self).__init__()
     self.pad = nn.ZeroPad2d((1, 0, 1, 0))
     self.pool = nn.MaxPool2d(3, stride=2, padding=1)
Esempio n. 3
0
    def __init__(self,
                 input_dim,
                 output_dim,
                 kernel_size,
                 stride,
                 padding=0,
                 norm='none',
                 activation='relu',
                 pad_type='zero'):
        super(Conv2dBlock, self).__init__()
        self.use_bias = True
        # initialize padding
        if pad_type == 'reflect':
            self.pad = nn.ReflectionPad2d(padding)
        elif pad_type == 'replicate':
            self.pad = nn.ReplicationPad2d(padding)
        elif pad_type == 'zero':
            self.pad = nn.ZeroPad2d(padding)
        else:
            assert 0, "Unsupported padding type: {}".format(pad_type)

        # initialize normalization
        norm_dim = output_dim
        if norm == 'bn':
            self.norm = nn.BatchNorm2d(norm_dim)
        elif norm == 'in':
            #self.norm = nn.InstanceNorm2d(norm_dim, track_running_stats=True)
            self.norm = nn.InstanceNorm2d(norm_dim)
        elif norm == 'ln':
            self.norm = LayerNorm(norm_dim)
        elif norm == 'adain':
            self.norm = AdaptiveInstanceNorm2d(norm_dim)
        elif norm == 'none' or norm == 'sn':
            self.norm = None
        else:
            assert 0, "Unsupported normalization: {}".format(norm)

        # initialize activation
        if activation == 'relu':
            self.activation = nn.ReLU(inplace=True)
        elif activation == 'lrelu':
            self.activation = nn.LeakyReLU(0.2, inplace=True)
        elif activation == 'prelu':
            self.activation = nn.PReLU()
        elif activation == 'selu':
            self.activation = nn.SELU(inplace=True)
        elif activation == 'tanh':
            self.activation = nn.Tanh()
        elif activation == 'none':
            self.activation = None
        else:
            assert 0, "Unsupported activation: {}".format(activation)

        # initialize convolution
        if norm == 'sn':
            self.conv = SpectralNorm(
                nn.Conv2d(input_dim,
                          output_dim,
                          kernel_size,
                          stride,
                          bias=self.use_bias))
        else:
            self.conv = nn.Conv2d(input_dim,
                                  output_dim,
                                  kernel_size,
                                  stride,
                                  bias=self.use_bias)
def down_shift(x):
    x = x[:, :, :-1, :]
    pad = nn.ZeroPad2d((0, 0, 1, 0))
    return pad(x)
    def __init__(self, stem_filters, num_filters):
        super(CellStem1, self).__init__()
        self.num_filters = num_filters
        self.stem_filters = stem_filters
        self.conv_1x1 = nn.Sequential()
        self.conv_1x1.add_module('relu', nn.ReLU())
        self.conv_1x1.add_module(
            'conv',
            nn.Conv2d(2 * self.num_filters,
                      self.num_filters,
                      1,
                      stride=1,
                      bias=False))
        self.conv_1x1.add_module(
            'bn',
            nn.BatchNorm2d(self.num_filters,
                           eps=0.001,
                           momentum=0.1,
                           affine=True))

        self.relu = nn.ReLU()
        self.path_1 = nn.Sequential()
        self.path_1.add_module(
            'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False))
        self.path_1.add_module(
            'conv',
            nn.Conv2d(self.stem_filters,
                      self.num_filters // 2,
                      1,
                      stride=1,
                      bias=False))
        self.path_2 = nn.ModuleList()
        self.path_2.add_module('pad', nn.ZeroPad2d((0, 1, 0, 1)))
        self.path_2.add_module(
            'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False))
        self.path_2.add_module(
            'conv',
            nn.Conv2d(self.stem_filters,
                      self.num_filters // 2,
                      1,
                      stride=1,
                      bias=False))

        self.final_path_bn = nn.BatchNorm2d(self.num_filters,
                                            eps=0.001,
                                            momentum=0.1,
                                            affine=True)

        self.comb_iter_0_left = BranchSeparables(self.num_filters,
                                                 self.num_filters,
                                                 5,
                                                 2,
                                                 2,
                                                 name='specific',
                                                 bias=False)
        self.comb_iter_0_right = BranchSeparables(self.num_filters,
                                                  self.num_filters,
                                                  7,
                                                  2,
                                                  3,
                                                  name='specific',
                                                  bias=False)

        # self.comb_iter_1_left = nn.MaxPool2d(3, stride=2, padding=1)
        self.comb_iter_1_left = MaxPoolPad()
        self.comb_iter_1_right = BranchSeparables(self.num_filters,
                                                  self.num_filters,
                                                  7,
                                                  2,
                                                  3,
                                                  name='specific',
                                                  bias=False)

        # self.comb_iter_2_left = nn.AvgPool2d(3, stride=2, padding=1, count_include_pad=False)
        self.comb_iter_2_left = AvgPoolPad()
        self.comb_iter_2_right = BranchSeparables(self.num_filters,
                                                  self.num_filters,
                                                  5,
                                                  2,
                                                  2,
                                                  name='specific',
                                                  bias=False)

        self.comb_iter_3_right = nn.AvgPool2d(3,
                                              stride=1,
                                              padding=1,
                                              count_include_pad=False)

        self.comb_iter_4_left = BranchSeparables(self.num_filters,
                                                 self.num_filters,
                                                 3,
                                                 1,
                                                 1,
                                                 name='specific',
                                                 bias=False)
        # self.comb_iter_4_right = nn.MaxPool2d(3, stride=2, padding=1)
        self.comb_iter_4_right = MaxPoolPad()
Esempio n. 6
0
    def layer_pad(self, net, args, options):
        options = hc.Config(options)

        return nn.ZeroPad2d((args[0], args[1], args[2], args[3]))
Esempio n. 7
0
 def __init__(self, in_channels, out_channels, kernel_size, stride, padding, z_padding=1, bias=False):
     BranchSeparables.__init__(self, in_channels, out_channels, kernel_size, stride, padding, bias)
     self.padding = nn.ZeroPad2d((z_padding, 0, z_padding, 0))
Esempio n. 8
0
def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    """
    hyperparams = module_defs.pop(0)
    hyperparams.update(
        {
            "batch": int(hyperparams["batch"]),
            "subdivisions": int(hyperparams["subdivisions"]),
            "width": int(hyperparams["width"]),
            "height": int(hyperparams["height"]),
            "channels": int(hyperparams["channels"]),
            "optimizer": hyperparams.get("optimizer"),
            "momentum": float(hyperparams["momentum"]),
            "decay": float(hyperparams["decay"]),
            "learning_rate": float(hyperparams["learning_rate"]),
            "burn_in": int(hyperparams["burn_in"]),
            "max_batches": int(hyperparams["max_batches"]),
            "policy": hyperparams["policy"],
            "lr_steps": list(
                zip(map(int, hyperparams["steps"].split(",")), map(float, hyperparams["scales"].split(",")))
            ),
        }
    )
    assert (
        hyperparams["height"] == hyperparams["width"]
    ), "Height and width should be equal! Non square images are padded with zeros."
    output_filters = [hyperparams["channels"]]
    module_list = nn.ModuleList()
    for module_i, module_def in enumerate(module_defs):
        modules = nn.Sequential()

        if module_def["type"] == "convolutional":
            bn = int(module_def["batch_normalize"])
            filters = int(module_def["filters"])
            kernel_size = int(module_def["size"])
            pad = (kernel_size - 1) // 2
            modules.add_module(
                f"conv_{module_i}",
                nn.Conv2d(
                    in_channels=output_filters[-1],
                    out_channels=filters,
                    kernel_size=kernel_size,
                    stride=int(module_def["stride"]),
                    padding=pad,
                    bias=not bn,
                ),
            )
            if bn:
                modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
            if module_def["activation"] == "leaky":
                modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1))

        elif module_def["type"] == "maxpool":
            kernel_size = int(module_def["size"])
            stride = int(module_def["stride"])
            if kernel_size == 2 and stride == 1:
                modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1)))
            maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2))
            modules.add_module(f"maxpool_{module_i}", maxpool)

        elif module_def["type"] == "upsample":
            upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
            modules.add_module(f"upsample_{module_i}", upsample)

        elif module_def["type"] == "route":
            layers = [int(x) for x in module_def["layers"].split(",")]
            filters = sum([output_filters[1:][i] for i in layers])
            modules.add_module(f"route_{module_i}", nn.Sequential())

        elif module_def["type"] == "shortcut":
            filters = output_filters[1:][int(module_def["from"])]
            modules.add_module(f"shortcut_{module_i}", nn.Sequential())

        elif module_def["type"] == "yolo":
            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
            # Extract anchors
            anchors = [int(x) for x in module_def["anchors"].split(",")]
            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            num_classes = int(module_def["classes"])
            # Define detection layer
            yolo_layer = YOLOLayer(anchors, num_classes)
            modules.add_module(f"yolo_{module_i}", yolo_layer)
        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list
Esempio n. 9
0
 def __init__(self, padding):
     super(IRevInjectivePad, self).__init__()
     self.padding = padding
     self.pad = nn.ZeroPad2d(padding=(0, 0, 0, padding))
Esempio n. 10
0
    def __init__(self,
                 use_norm=True,
                 num_class=2,
                 layer_nums=[3, 5, 5],
                 layer_strides=[2, 2, 2],
                 num_filters=[128, 128, 256],
                 upsample_strides=[1,2,4], #
                 num_upsample_filters=[256, 256,256],
                 num_input_features=128,
                 num_anchor_per_loc=2,
                 encode_background_as_zeros=True,
                 use_direction_classifier=True,
                 use_groupnorm=False,
                 num_groups=32,
                 use_bev=False,
                 box_code_size=7,
                 use_rc_net=False,
                 name='rpn'):
        super(RPN, self).__init__()
        self._num_anchor_per_loc = num_anchor_per_loc
        self._use_direction_classifier = use_direction_classifier
        self._use_bev = use_bev
        self._use_rc_net = use_rc_net
        assert len(layer_nums) == 3
        assert len(layer_strides) == len(layer_nums)
        assert len(num_filters) == len(layer_nums)
        assert len(upsample_strides) == len(layer_nums)
        assert len(num_upsample_filters) == len(layer_nums)
        factors = []
        for i in range(len(layer_nums)):
            assert int(np.prod(layer_strides[:i + 1])) % upsample_strides[i] == 0
            factors.append(np.prod(layer_strides[:i + 1]) // upsample_strides[i])
        assert all([x == factors[0] for x in factors])
        if use_norm:
            if use_groupnorm:
                BatchNorm2d = change_default_args(
                    num_groups=num_groups, eps=1e-3)(GroupNorm)
            else:
                BatchNorm2d = change_default_args(
                    eps=1e-3, momentum=0.01)(nn.BatchNorm2d)
            Conv2d = change_default_args(bias=False)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=False)(
                nn.ConvTranspose2d)
        else:
            BatchNorm2d = Empty
            Conv2d = change_default_args(bias=True)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=True)(
                nn.ConvTranspose2d)

        # note that when stride > 1, conv2d with same padding isn't
        # equal to pad-conv2d. we should use pad-conv2d.
        block2_input_filters = num_filters[0]
        if use_bev:
            self.bev_extractor = Sequential(
                Conv2d(6, 32, 3, padding=1),
                BatchNorm2d(32),
                nn.ReLU(),
                # nn.MaxPool2d(2, 2),
                Conv2d(32, 64, 3, padding=1),
                BatchNorm2d(64),
                nn.ReLU(),
                nn.MaxPool2d(2, 2),
            )
            block2_input_filters += 64

        self.block1 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(
                num_input_features, num_filters[0], 3, stride=layer_strides[0]),
            BatchNorm2d(num_filters[0]),
            nn.ReLU(),
        )
        for i in range(layer_nums[0]):
            self.block1.add(
                Conv2d(num_filters[0], num_filters[0], 3, padding=1))
            self.block1.add(BatchNorm2d(num_filters[0]))
            self.block1.add(nn.ReLU())
        self.deconv1 = Sequential(
            ConvTranspose2d(
                num_filters[0],
                num_upsample_filters[0],
                upsample_strides[0],
                stride=upsample_strides[0]),
            BatchNorm2d(num_upsample_filters[0]),
            nn.ReLU(),
        )
        self.block2 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(
                block2_input_filters,
                num_filters[1],
                3,
                stride=layer_strides[1]),
            BatchNorm2d(num_filters[1]),
            nn.ReLU(),
        )
        for i in range(layer_nums[1]):
            self.block2.add(
                Conv2d(num_filters[1], num_filters[1], 3, padding=1))
            self.block2.add(BatchNorm2d(num_filters[1]))
            self.block2.add(nn.ReLU())
        self.deconv2 = Sequential(
            ConvTranspose2d(
                num_filters[1],
                num_upsample_filters[1],
                upsample_strides[1],
                stride=upsample_strides[1]),
            BatchNorm2d(num_upsample_filters[1]),
            nn.ReLU(),
        )
        self.block3 = Sequential(
            nn.ZeroPad2d(1),
            Conv2d(num_filters[1], num_filters[2], 3, stride=layer_strides[2]),
            BatchNorm2d(num_filters[2]),
            nn.ReLU(),
        )
        for i in range(layer_nums[2]):
            self.block3.add(
                Conv2d(num_filters[2], num_filters[2], 3, padding=1))
            self.block3.add(BatchNorm2d(num_filters[2]))
            self.block3.add(nn.ReLU())
        self.deconv3 = Sequential(
            ConvTranspose2d(
                num_filters[2],
                num_upsample_filters[2],
                upsample_strides[2],
                stride=upsample_strides[2]),
            BatchNorm2d(num_upsample_filters[2]),
            nn.ReLU(),
        )
        if encode_background_as_zeros:
            num_cls = num_anchor_per_loc * num_class
        else:
            num_cls = num_anchor_per_loc * (num_class + 1)
        self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1)
        self.conv_box = nn.Conv2d(
            sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1)
        if use_direction_classifier:
            self.conv_dir_cls = nn.Conv2d(
                sum(num_upsample_filters), num_anchor_per_loc * 2, 1)

        if self._use_rc_net:
            self.conv_rc = nn.Conv2d(
                sum(num_upsample_filters), num_anchor_per_loc * box_code_size,
                1)
Esempio n. 11
0
    def __init__(self,
                 use_norm=True,
                 num_class=2,
                 layer_nums=[3, 5, 5],
                 layer_strides=[2, 2, 2],
                 num_filters=[128, 128, 256],
                 upsample_strides=[1, 2, 4],
                 num_upsample_filters=[256, 256, 256],
                 num_input_features=128,
                 num_anchor_per_loc=2,
                 encode_background_as_zeros=True,
                 use_direction_classifier=True,
                 use_groupnorm=False,
                 num_groups=32,
                 use_bev=False,
                 box_code_size=7,
                 use_rc_net=False,
                 name='rpn'):
        super(RPNV2, self).__init__()
        self._num_anchor_per_loc = num_anchor_per_loc
        self._use_direction_classifier = use_direction_classifier
        self._use_bev = use_bev
        self._use_rc_net = use_rc_net
        # assert len(layer_nums) == 3
        assert len(layer_strides) == len(layer_nums)
        assert len(num_filters) == len(layer_nums)
        assert len(upsample_strides) == len(layer_nums)
        assert len(num_upsample_filters) == len(layer_nums)
        """
        factors = []
        for i in range(len(layer_nums)):
            assert int(np.prod(layer_strides[:i + 1])) % upsample_strides[i] == 0
            factors.append(np.prod(layer_strides[:i + 1]) // upsample_strides[i])
        assert all([x == factors[0] for x in factors])
        """
        if use_norm:
            if use_groupnorm:
                BatchNorm2d = change_default_args(
                    num_groups=num_groups, eps=1e-3)(GroupNorm)
            else:
                BatchNorm2d = change_default_args(
                    eps=1e-3, momentum=0.01)(nn.BatchNorm2d)
            Conv2d = change_default_args(bias=False)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=False)(
                nn.ConvTranspose2d)
        else:
            BatchNorm2d = Empty
            Conv2d = change_default_args(bias=True)(nn.Conv2d)
            ConvTranspose2d = change_default_args(bias=True)(
                nn.ConvTranspose2d)

        in_filters = [num_input_features, *num_filters[:-1]]
        # note that when stride > 1, conv2d with same padding isn't
        # equal to pad-conv2d. we should use pad-conv2d.
        blocks = []
        deblocks = []
        
        for i, layer_num in enumerate(layer_nums):
            block = Sequential(
                nn.ZeroPad2d(1),
                Conv2d(
                    in_filters[i], num_filters[i], 3, stride=layer_strides[i]),
                BatchNorm2d(num_filters[i]),
                nn.ReLU(),
            )
            for j in range(layer_num):
                block.add(
                    Conv2d(num_filters[i], num_filters[i], 3, padding=1))
                block.add(BatchNorm2d(num_filters[i]))
                block.add(nn.ReLU())
            blocks.append(block)
            deblock = Sequential(
                ConvTranspose2d(
                    num_filters[i],
                    num_upsample_filters[i],
                    upsample_strides[i],
                    stride=upsample_strides[i]),
                BatchNorm2d(num_upsample_filters[i]),
                nn.ReLU(),
            )
            deblocks.append(deblock)
        self.blocks = nn.ModuleList(blocks)
        self.deblocks = nn.ModuleList(deblocks)
        if encode_background_as_zeros:
            num_cls = num_anchor_per_loc * num_class
        else:
            num_cls = num_anchor_per_loc * (num_class + 1)
        self.conv_cls = nn.Conv2d(sum(num_upsample_filters), num_cls, 1)
        self.conv_box = nn.Conv2d(
            sum(num_upsample_filters), num_anchor_per_loc * box_code_size, 1)
        if use_direction_classifier:
            self.conv_dir_cls = nn.Conv2d(
                sum(num_upsample_filters), num_anchor_per_loc * 2, 1)

        if self._use_rc_net:
            self.conv_rc = nn.Conv2d(
                sum(num_upsample_filters), num_anchor_per_loc * box_code_size,
                1)
Esempio n. 12
0
def create_modules(module_defs, img_size, arc):
    # Constructs module list of layer blocks from module configuration in module_defs

    hyperparams = module_defs.pop(0)
    output_filters = [int(hyperparams['channels'])]
    module_list = nn.ModuleList()
    routs = []  # list of layers which rout to deeper layes
    yolo_index = -1

    for i, mdef in enumerate(module_defs):
        modules = nn.Sequential()

        if mdef['type'] == 'convolutional':
            bn = int(mdef['batch_normalize'])
            filters = int(mdef['filters'])
            kernel_size = int(mdef['size'])
            pad = (kernel_size - 1) // 2 if int(mdef['pad']) else 0
            modules.add_module(
                'Conv2d',
                nn.Conv2d(in_channels=output_filters[-1],
                          out_channels=filters,
                          kernel_size=kernel_size,
                          stride=int(mdef['stride']),
                          padding=pad,
                          bias=not bn))
            if bn:
                modules.add_module('BatchNorm2d',
                                   nn.BatchNorm2d(filters, momentum=0.1))
            if mdef['activation'] == 'leaky':  # TODO: activation study https://github.com/ultralytics/yolov3/issues/441
                modules.add_module('activation', nn.LeakyReLU(0.1,
                                                              inplace=True))
                # modules.add_module('activation', nn.PReLU(num_parameters=1, init=0.10))
                # modules.add_module('activation', Swish())
            elif mdef['activation'] == 'mish':
                modules.add_module('activation', Mish())
        elif mdef['type'] == 'convolutional_nobias':
            filters = int(mdef['filters'])
            kernel_size = int(mdef['size'])
            modules.add_module(
                'Conv2d',
                nn.Conv2d(in_channels=output_filters[-1],
                          out_channels=filters,
                          kernel_size=kernel_size,
                          stride=int(mdef['stride']),
                          bias=False))
        elif mdef['type'] == 'convolutional_noconv':
            filters = int(mdef['filters'])
            modules.add_module('BatchNorm2d',
                               nn.BatchNorm2d(filters, momentum=0.1))
            modules.add_module('activation', nn.LeakyReLU(0.1, inplace=True))

        elif mdef['type'] == 'maxpool':
            kernel_size = int(mdef['size'])
            stride = int(mdef['stride'])
            maxpool = nn.MaxPool2d(kernel_size=kernel_size,
                                   stride=stride,
                                   padding=int((kernel_size - 1) // 2))
            if kernel_size == 2 and stride == 1:  # yolov3-tiny
                modules.add_module('ZeroPad2d', nn.ZeroPad2d((0, 1, 0, 1)))
                modules.add_module('MaxPool2d', maxpool)
            else:
                modules = maxpool

        elif mdef['type'] == 'upsample':
            modules = nn.Upsample(scale_factor=int(mdef['stride']),
                                  mode='nearest')

        elif mdef[
                'type'] == 'route':  # nn.Sequential() placeholder for 'route' layer
            layers = [int(x) for x in mdef['layers'].split(',')]
            filters = sum(
                [output_filters[i + 1 if i > 0 else i] for i in layers])
            if 'groups' in mdef:
                filters = filters // 2
            routs.extend([l if l > 0 else l + i for l in layers])
            # if mdef[i+1]['type'] == 'reorg3d':
            #     modules = nn.Upsample(scale_factor=1/float(mdef[i+1]['stride']), mode='nearest')  # reorg3d

        elif mdef[
                'type'] == 'shortcut':  # nn.Sequential() placeholder for 'shortcut' layer
            filters = output_filters[int(mdef['from'])]
            layer = int(mdef['from'])
            routs.extend([i + layer if layer < 0 else layer])

        elif mdef['type'] == 'reorg3d':  # yolov3-spp-pan-scale
            # torch.Size([16, 128, 104, 104])
            # torch.Size([16, 64, 208, 208]) <-- # stride 2 interpolate dimensions 2 and 3 to cat with prior layer
            pass

        elif mdef['type'] == 'yolo':
            yolo_index += 1
            mask = [int(x) for x in mdef['mask'].split(',')]  # anchor mask
            modules = YOLOLayer(
                anchors=mdef['anchors'][mask],  # anchor list
                nc=int(mdef['classes']),  # number of classes
                img_size=img_size,  # (416, 416)
                yolo_index=yolo_index,  # 0, 1 or 2
                arc=arc)  # yolo architecture

            # Initialize preceding Conv2d() bias (https://arxiv.org/pdf/1708.02002.pdf section 3.3)
            try:
                if arc == 'defaultpw' or arc == 'Fdefaultpw':  # default with positive weights
                    b = [-4, -3.6]  # obj, cls
                elif arc == 'default':  # default no pw (40 cls, 80 obj)
                    b = [-5.5, -4.0]
                elif arc == 'uBCE':  # unified BCE (80 classes)
                    b = [0, -8.5]
                elif arc == 'uCE':  # unified CE (1 background + 80 classes)
                    b = [10, -0.1]
                elif arc == 'Fdefault':  # Focal default no pw (28 cls, 21 obj, no pw)
                    b = [-2.1, -1.8]
                elif arc == 'uFBCE' or arc == 'uFBCEpw':  # unified FocalBCE (5120 obj, 80 classes)
                    b = [0, -6.5]
                elif arc == 'uFCE':  # unified FocalCE (64 cls, 1 background + 80 classes)
                    b = [7.7, -1.1]

                bias = module_list[-1][0].bias.view(len(mask),
                                                    -1)  # 255 to 3x85
                bias[:, 4] += b[0] - bias[:, 4].mean()  # obj
                bias[:, 5:] += b[1] - bias[:, 5:].mean()  # cls
                # bias = torch.load('weights/yolov3-spp.bias.pt')[yolo_index]  # list of tensors [3x85, 3x85, 3x85]
                module_list[-1][0].bias = torch.nn.Parameter(bias.view(-1))
                # utils.print_model_biases(model)
            except:
                print('WARNING: smart bias initialization failure.')
        elif mdef['type'] == 'focus':
            filters = int(mdef['filters'])
        else:
            print('Warning: Unrecognized Layer Type: ' + mdef['type'])

        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return module_list, routs, hyperparams
Esempio n. 13
0
    def __init__(self, model_cfg, input_channels):
        super().__init__()
        self.model_cfg = model_cfg

        if self.model_cfg.get('LAYER_NUMS', None) is not None:
            assert len(self.model_cfg.LAYER_NUMS) == len(
                self.model_cfg.LAYER_STRIDES) == len(
                    self.model_cfg.NUM_FILTERS)
            layer_nums = self.model_cfg.LAYER_NUMS
            layer_strides = self.model_cfg.LAYER_STRIDES
            num_filters = self.model_cfg.NUM_FILTERS
        else:
            layer_nums = layer_strides = num_filters = []

        if self.model_cfg.get('UPSAMPLE_STRIDES', None) is not None:
            assert len(self.model_cfg.UPSAMPLE_STRIDES) == len(
                self.model_cfg.NUM_UPSAMPLE_FILTERS)
            num_upsample_filters = self.model_cfg.NUM_UPSAMPLE_FILTERS
            upsample_strides = self.model_cfg.UPSAMPLE_STRIDES
        else:
            upsample_strides = num_upsample_filters = []

        num_levels = len(layer_nums)
        c_in_list = [input_channels, *num_filters[:-1]]
        self.blocks = nn.ModuleList()
        self.deblocks = nn.ModuleList()
        for idx in range(num_levels):
            cur_layers = [
                nn.ZeroPad2d(1),
                nn.Conv2d(c_in_list[idx],
                          num_filters[idx],
                          kernel_size=3,
                          stride=layer_strides[idx],
                          padding=0,
                          bias=False),
                nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01),
                nn.ReLU()
            ]
            for k in range(layer_nums[idx]):
                cur_layers.extend([
                    nn.Conv2d(num_filters[idx],
                              num_filters[idx],
                              kernel_size=3,
                              padding=1,
                              bias=False),
                    nn.BatchNorm2d(num_filters[idx], eps=1e-3, momentum=0.01),
                    nn.ReLU()
                ])
            self.blocks.append(nn.Sequential(*cur_layers))
            if len(upsample_strides) > 0:
                stride = upsample_strides[idx]
                if stride > 1:
                    self.deblocks.append(
                        nn.Sequential(
                            nn.ConvTranspose2d(num_filters[idx],
                                               num_upsample_filters[idx],
                                               upsample_strides[idx],
                                               stride=upsample_strides[idx],
                                               bias=False),
                            nn.BatchNorm2d(num_upsample_filters[idx],
                                           eps=1e-3,
                                           momentum=0.01), nn.ReLU()))
                else:
                    stride = np.round(1 / stride).astype(np.int)
                    self.deblocks.append(
                        nn.Sequential(
                            nn.Conv2d(num_filters[idx],
                                      num_upsample_filters[idx],
                                      stride,
                                      stride=stride,
                                      bias=False),
                            nn.BatchNorm2d(num_upsample_filters[idx],
                                           eps=1e-3,
                                           momentum=0.01), nn.ReLU()))

        c_in = sum(num_upsample_filters)
        if len(upsample_strides) > num_levels:
            self.deblocks.append(
                nn.Sequential(
                    nn.ConvTranspose2d(c_in,
                                       c_in,
                                       upsample_strides[-1],
                                       stride=upsample_strides[-1],
                                       bias=False),
                    nn.BatchNorm2d(c_in, eps=1e-3, momentum=0.01),
                    nn.ReLU(),
                ))

        self.num_bev_features = c_in
Esempio n. 14
0
def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    """
    hyperparams = module_defs.pop(0)
    output_filters = [int(hyperparams['channels'])]
    module_list = nn.ModuleList()
    yolo_layer_count = 0
    for i, module_def in enumerate(module_defs):
        modules = nn.Sequential()

        if module_def['type'] == 'convolutional':
            bn = int(module_def['batch_normalize'])
            filters = int(module_def['filters'])
            kernel_size = int(module_def['size'])
            pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0
            modules.add_module(
                'conv_%d' % i,
                nn.Conv2d(in_channels=output_filters[-1],
                          out_channels=filters,
                          kernel_size=kernel_size,
                          stride=int(module_def['stride']),
                          padding=pad,
                          bias=not bn))
            if bn:
                modules.add_module('batch_norm_%d' % i,
                                   nn.BatchNorm2d(filters))
            if module_def['activation'] == 'leaky':
                modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))

        elif module_def['type'] == 'maxpool':
            kernel_size = int(module_def['size'])
            stride = int(module_def['stride'])
            if kernel_size == 2 and stride == 1:
                modules.add_module('_debug_padding_%d' % i,
                                   nn.ZeroPad2d((0, 1, 0, 1)))
            maxpool = nn.MaxPool2d(kernel_size=kernel_size,
                                   stride=stride,
                                   padding=int((kernel_size - 1) // 2))
            modules.add_module('maxpool_%d' % i, maxpool)

        elif module_def['type'] == 'upsample':
            # upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest')  # WARNING: deprecated
            upsample = Upsample(scale_factor=int(module_def['stride']))
            modules.add_module('upsample_%d' % i, upsample)

        elif module_def['type'] == 'route':
            layers = [int(x) for x in module_def['layers'].split(',')]
            filters = sum(
                [output_filters[i + 1 if i > 0 else i] for i in layers])
            modules.add_module('route_%d' % i, EmptyLayer())

        elif module_def['type'] == 'shortcut':
            filters = output_filters[int(module_def['from'])]
            modules.add_module('shortcut_%d' % i, EmptyLayer())

        elif module_def['type'] == 'yolo':
            anchor_idxs = [int(x) for x in module_def['mask'].split(',')]
            # Extract anchors
            anchors = [float(x) for x in module_def['anchors'].split(',')]
            anchors = [(anchors[i], anchors[i + 1])
                       for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            nC = int(module_def['classes'])  # number of classes
            img_size = int(hyperparams['height'])
            # Define detection layer
            yolo_layer = YOLOLayer(anchors,
                                   nC,
                                   img_size,
                                   yolo_layer_count,
                                   cfg=hyperparams['cfg'])
            modules.add_module('yolo_%d' % i, yolo_layer)
            yolo_layer_count += 1

        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list
Esempio n. 15
0
    def __init__(self):
        super(CAN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1, bias=True),
            nn.ReLU(inplace=True),
            nn.Conv2d(512,
                      512,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=True,
                      dilation=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(512,
                      512,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=True,
                      dilation=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(512,
                      512,
                      kernel_size=3,
                      stride=1,
                      padding=1,
                      bias=True,
                      dilation=2),
            nn.ReLU(inplace=True),
            nn.Conv2d(512,
                      4096,
                      kernel_size=7,
                      stride=1,
                      padding=3,
                      bias=True,
                      dilation=4),  #fc6 layer
            nn.ReLU(inplace=True),
            nn.Conv2d(4096,
                      4096,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      bias=True),  #fc7 layer
            nn.ReLU(inplace=True),
            nn.Conv2d(4096, 2, kernel_size=1, stride=1, padding=0,
                      bias=True),  #final layer
            nn.ReLU(inplace=True),
            nn.ZeroPad2d(1),

            # context module
            nn.Conv2d(2,
                      2,
                      kernel_size=3,
                      stride=1,
                      padding=(0, 1),
                      bias=True,
                      dilation=(1, 2)),  #ctx_conv
            nn.ReLU(inplace=True),
            nn.ZeroPad2d(1),
            nn.Conv2d(2,
                      2,
                      kernel_size=3,
                      stride=1,
                      padding=(0, 1),
                      bias=True,
                      dilation=(1, 2)),
            nn.ReLU(inplace=True),
            nn.ZeroPad2d(2),
            nn.Conv2d(2,
                      2,
                      kernel_size=3,
                      stride=1,
                      padding=(0, 2),
                      bias=True,
                      dilation=(2, 4)),
            nn.ReLU(inplace=True),
            nn.ZeroPad2d(4),
            nn.Conv2d(2,
                      2,
                      kernel_size=3,
                      stride=1,
                      padding=(0, 4),
                      bias=True,
                      dilation=(4, 8)),
            nn.ReLU(inplace=True),
            nn.ZeroPad2d(8),
            nn.Conv2d(2,
                      2,
                      kernel_size=3,
                      stride=1,
                      padding=(0, 8),
                      bias=True,
                      dilation=(8, 16)),
            nn.ReLU(inplace=True),
            nn.ZeroPad2d(16),
            nn.Conv2d(2,
                      2,
                      kernel_size=3,
                      stride=1,
                      padding=(0, 16),
                      bias=True,
                      dilation=(16, 32)),
            nn.ReLU(inplace=True),
            nn.ZeroPad2d(1),
            nn.Conv2d(2,
                      2,
                      kernel_size=3,
                      stride=1,
                      padding=0,
                      bias=True,
                      dilation=(1, 1)),
            nn.ReLU(inplace=True),

            #nn.ZeroPad2d(1),
            nn.Conv2d(2,
                      2,
                      kernel_size=1,
                      stride=1,
                      padding=0,
                      bias=False,
                      dilation=1),
            # nn.ZeroPad2d(32),
            # nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, bias=True, dilation=32),
            # nn.ReLU(inplace=True),

            # nn.ZeroPad2d(64),
            # nn.Conv2d(19, 19, kernel_size=3, stride=1, padding=1, bias=True, dilation=64),
            # nn.ReLU(inplace=True),

            # nn.ZeroPad2d(1),
            # nn.Conv2d(19, 19, kernel_size=3, stride=1, padding=1, bias=True),
            # nn.LeakyReLU(inplace=True),
            # nn.Conv2d(19, 2, kernel_size=1, stride=1, padding=0, bias=True),

            # nn.Upsample(size=(CONFIG['tusimple']['output_shape'][0],CONFIG['tusimple']['output_shape'][1]), mode='bilinear'),

            # nn.Conv2d(19, 19, kernel_size=16, stride=1, padding=7, bias=False),
            # nn.ReLU(inplace=True),

            #nn.Softmax(dim=1)
        )
Esempio n. 16
0
def train_single_scale(netD,
                       netG,
                       reals,
                       Gs,
                       Zs,
                       in_s,
                       NoiseAmp,
                       opt,
                       centers=None):

    real = reals[len(Gs)]
    opt.nzx = real.shape[2]  #+(opt.ker_size-1)*(opt.num_layer)
    opt.nzy = real.shape[3]  #+(opt.ker_size-1)*(opt.num_layer)
    opt.receptive_field = opt.ker_size + ((opt.ker_size - 1) *
                                          (opt.num_layer - 1)) * opt.stride
    pad_noise = int(((opt.ker_size - 1) * opt.num_layer) / 2)
    pad_image = int(((opt.ker_size - 1) * opt.num_layer) / 2)
    if opt.mode == 'animation_train':
        opt.nzx = real.shape[2] + (opt.ker_size - 1) * (opt.num_layer)
        opt.nzy = real.shape[3] + (opt.ker_size - 1) * (opt.num_layer)
        pad_noise = 0
    m_noise = nn.ZeroPad2d(int(pad_noise))
    m_image = nn.ZeroPad2d(int(pad_image))

    alpha = opt.alpha

    fixed_noise = functions.generate_noise([opt.nc_z, opt.nzx, opt.nzy])
    z_opt = torch.full(fixed_noise.shape, 0, device=opt.device)
    z_opt = m_noise(z_opt)

    # setup optimizer
    optimizerD = optim.Adam(netD.parameters(),
                            lr=opt.lr_d,
                            betas=(opt.beta1, 0.999))
    optimizerG = optim.Adam(netG.parameters(),
                            lr=opt.lr_g,
                            betas=(opt.beta1, 0.999))
    schedulerD = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizerD,
                                                      milestones=[1600],
                                                      gamma=opt.gamma)
    schedulerG = torch.optim.lr_scheduler.MultiStepLR(optimizer=optimizerG,
                                                      milestones=[1600],
                                                      gamma=opt.gamma)

    errD2plot = []
    errG2plot = []
    D_real2plot = []
    D_fake2plot = []
    z_opt2plot = []

    for epoch in range(opt.niter):
        schedulerD.step()
        schedulerG.step()
        if (Gs == []) & (opt.mode != 'SR_train'):
            z_opt = functions.generate_noise([1, opt.nzx, opt.nzy])
            z_opt = m_noise(z_opt.expand(1, 3, opt.nzx, opt.nzy))
            noise_ = functions.generate_noise([1, opt.nzx, opt.nzy])
            noise_ = m_noise(noise_.expand(1, 3, opt.nzx, opt.nzy))
        else:
            noise_ = functions.generate_noise([opt.nc_z, opt.nzx, opt.nzy])
            noise_ = m_noise(noise_)

        ############################
        # (1) Update D network: maximize D(x) + D(G(z))
        ###########################
        for j in range(opt.Dsteps):
            # train with real
            netD.zero_grad()

            output = netD(real).to(opt.device)
            #D_real_map = output.detach()
            errD_real = -output.mean()  #-a
            errD_real.backward(retain_graph=True)
            D_x = -errD_real.item()

            # train with fake
            if (j == 0) & (epoch == 0):
                if (Gs == []) & (opt.mode != 'SR_train'):
                    prev = torch.full([1, opt.nc_z, opt.nzx, opt.nzy],
                                      0,
                                      device=opt.device)
                    in_s = prev
                    prev = m_image(prev)
                    z_prev = torch.full([1, opt.nc_z, opt.nzx, opt.nzy],
                                        0,
                                        device=opt.device)
                    z_prev = m_noise(z_prev)
                    opt.noise_amp = 1
                elif opt.mode == 'SR_train':
                    z_prev = in_s
                    criterion = nn.MSELoss()
                    RMSE = torch.sqrt(criterion(real, z_prev))
                    opt.noise_amp = opt.noise_amp_init * RMSE
                    z_prev = m_image(z_prev)
                    prev = z_prev
                else:
                    prev = draw_concat(Gs, Zs, reals, NoiseAmp, in_s, 'rand',
                                       m_noise, m_image, opt)
                    prev = m_image(prev)
                    z_prev = draw_concat(Gs, Zs, reals, NoiseAmp, in_s, 'rec',
                                         m_noise, m_image, opt)
                    criterion = nn.MSELoss()
                    RMSE = torch.sqrt(criterion(real, z_prev))
                    opt.noise_amp = opt.noise_amp_init * RMSE
                    z_prev = m_image(z_prev)
            else:
                prev = draw_concat(Gs, Zs, reals, NoiseAmp, in_s, 'rand',
                                   m_noise, m_image, opt)
                prev = m_image(prev)

            if opt.mode == 'paint_train':
                prev = functions.quant2centers(prev, centers)
                plt.imsave('%s/prev.png' % (opt.outf),
                           functions.convert_image_np(prev),
                           vmin=0,
                           vmax=1)

            if (Gs == []) & (opt.mode != 'SR_train'):
                noise = noise_
            else:
                noise = opt.noise_amp * noise_ + prev

            fake = netG(noise.detach(), prev)
            output = netD(fake.detach())
            errD_fake = output.mean()
            errD_fake.backward(retain_graph=True)
            D_G_z = output.mean().item()

            gradient_penalty = functions.calc_gradient_penalty(
                netD, real, fake, opt.lambda_grad)
            gradient_penalty.backward()

            errD = errD_real + errD_fake + gradient_penalty
            optimizerD.step()

        errD2plot.append(errD.detach())

        ############################
        # (2) Update G network: maximize D(G(z))
        ###########################

        for j in range(opt.Gsteps):
            netG.zero_grad()
            output = netD(fake)
            #D_fake_map = output.detach()
            errG = -output.mean()
            errG.backward(retain_graph=True)
            if alpha != 0:
                loss = nn.MSELoss()
                if opt.mode == 'paint_train':
                    z_prev = functions.quant2centers(z_prev, centers)
                    plt.imsave('%s/z_prev.png' % (opt.outf),
                               functions.convert_image_np(z_prev),
                               vmin=0,
                               vmax=1)
                Z_opt = opt.noise_amp * z_opt + z_prev
                rec_loss = alpha * loss(netG(Z_opt.detach(), z_prev), real)
                rec_loss.backward(retain_graph=True)
                rec_loss = rec_loss.detach()
            else:
                Z_opt = z_opt
                rec_loss = 0

            optimizerG.step()

        errG2plot.append(errG.detach() + rec_loss)
        D_real2plot.append(D_x)
        D_fake2plot.append(D_G_z)
        z_opt2plot.append(rec_loss)

        if epoch % 25 == 0 or epoch == (opt.niter - 1):
            print('[%d/%d]' % (epoch, opt.niter))

        if epoch % 500 == 0 or epoch == (opt.niter - 1):
            plt.imsave('%s/fake_sample.png' % (opt.outf),
                       functions.convert_image_np(fake.detach()),
                       vmin=0,
                       vmax=1)
            plt.imsave('%s/G(z_opt).png' % (opt.outf),
                       functions.convert_image_np(
                           netG(Z_opt.detach(), z_prev).detach()),
                       vmin=0,
                       vmax=1)
            #plt.imsave('%s/D_fake.png'   % (opt.outf), functions.convert_image_np(D_fake_map))
            #plt.imsave('%s/D_real.png'   % (opt.outf), functions.convert_image_np(D_real_map))
            #plt.imsave('%s/z_opt.png'    % (opt.outf), functions.convert_image_np(z_opt.detach()), vmin=0, vmax=1)
            #plt.imsave('%s/prev.png'     %  (opt.outf), functions.convert_image_np(prev), vmin=0, vmax=1)
            #plt.imsave('%s/noise.png'    %  (opt.outf), functions.convert_image_np(noise), vmin=0, vmax=1)
            #plt.imsave('%s/z_prev.png'   % (opt.outf), functions.convert_image_np(z_prev), vmin=0, vmax=1)

            torch.save(z_opt, '%s/z_opt.pth' % (opt.outf))
    functions.save_networks(netG, netD, z_opt, opt)
    return z_opt, in_s, netG
Esempio n. 17
0
def create_modules(module_defs, device='cuda'):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    """
    hyperparams = module_defs.pop(0)
    output_filters = [int(hyperparams['channels'])]
    module_list = nn.ModuleList()
    yolo_layer_count = 0
    for i, module_def in enumerate(module_defs):
        modules = nn.Sequential()

        if module_def['type'] == 'convolutional':
            bn = int(module_def['batch_normalize'])
            filters = int(module_def['filters'])
            kernel_size = int(module_def['size'])
            pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0
            modules.add_module(
                'conv_%d' % i,
                nn.Conv2d(in_channels=output_filters[-1],
                          out_channels=filters,
                          kernel_size=kernel_size,
                          stride=int(module_def['stride']),
                          padding=pad,
                          bias=not bn))
            if bn:
                after_bn = batch_norm(filters)
                modules.add_module('batch_norm_%d' % i, after_bn)
                # BN is uniformly initialized by default in pytorch 1.0.1.
                # In pytorch>1.2.0, BN weights are initialized with constant 1,
                # but we find with the uniform initialization the model converges faster.
                nn.init.uniform_(after_bn.weight)
                nn.init.zeros_(after_bn.bias)
            if module_def['activation'] == 'leaky':
                modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))

        elif module_def['type'] == 'maxpool':
            kernel_size = int(module_def['size'])
            stride = int(module_def['stride'])
            if kernel_size == 2 and stride == 1:
                modules.add_module('_debug_padding_%d' % i,
                                   nn.ZeroPad2d((0, 1, 0, 1)))
            maxpool = nn.MaxPool2d(kernel_size=kernel_size,
                                   stride=stride,
                                   padding=int((kernel_size - 1) // 2))
            modules.add_module('maxpool_%d' % i, maxpool)

        elif module_def['type'] == 'upsample':
            upsample = Upsample(scale_factor=int(module_def['stride']))
            modules.add_module('upsample_%d' % i, upsample)

        elif module_def['type'] == 'route':
            layers = [int(x) for x in module_def['layers'].split(',')]
            filters = sum(
                [output_filters[i + 1 if i > 0 else i] for i in layers])
            modules.add_module('route_%d' % i, EmptyLayer())

        elif module_def['type'] == 'shortcut':
            filters = output_filters[int(module_def['from'])]
            modules.add_module('shortcut_%d' % i, EmptyLayer())

        elif module_def['type'] == 'yolo':
            anchor_idxs = [int(x) for x in module_def['mask'].split(',')]
            # Extract anchors
            anchors = [float(x) for x in module_def['anchors'].split(',')]
            anchors = [(anchors[i], anchors[i + 1])
                       for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            nC = int(module_def['classes'])  # number of classes
            img_size = (int(hyperparams['width']), int(hyperparams['height']))
            # Define detection layer
            yolo_layer = YOLOLayer(anchors, nC, int(hyperparams['nID']),
                                   int(hyperparams['embedding_dim']), img_size,
                                   yolo_layer_count, device)
            modules.add_module('yolo_%d' % i, yolo_layer)
            yolo_layer_count += 1

        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list
Esempio n. 18
0
    def forward(self, guidance, blur_depth, sparse_depth=None):

        # normalize features
        gate1_wb_cmb = torch.abs(guidance.narrow(1, 0, self.out_feature))
        gate2_wb_cmb = torch.abs(
            guidance.narrow(1, 1 * self.out_feature, self.out_feature))
        gate3_wb_cmb = torch.abs(
            guidance.narrow(1, 2 * self.out_feature, self.out_feature))
        gate4_wb_cmb = torch.abs(
            guidance.narrow(1, 3 * self.out_feature, self.out_feature))
        gate5_wb_cmb = torch.abs(
            guidance.narrow(1, 4 * self.out_feature, self.out_feature))
        gate6_wb_cmb = torch.abs(
            guidance.narrow(1, 5 * self.out_feature, self.out_feature))
        gate7_wb_cmb = torch.abs(
            guidance.narrow(1, 6 * self.out_feature, self.out_feature))
        gate8_wb_cmb = torch.abs(
            guidance.narrow(1, 7 * self.out_feature, self.out_feature))

        # gate1:left_top, gate2:center_top, gate3:right_top
        # gate4:left_center,              , gate5: right_center
        # gate6:left_bottom, gate7: center_bottom, gate8: right_bottm

        # top pad
        left_top_pad = nn.ZeroPad2d((0, 2, 0, 2))
        gate1_wb_cmb = left_top_pad(gate1_wb_cmb).unsqueeze(1)

        center_top_pad = nn.ZeroPad2d((1, 1, 0, 2))
        gate2_wb_cmb = center_top_pad(gate2_wb_cmb).unsqueeze(1)

        right_top_pad = nn.ZeroPad2d((2, 0, 0, 2))
        gate3_wb_cmb = right_top_pad(gate3_wb_cmb).unsqueeze(1)

        # center pad
        left_center_pad = nn.ZeroPad2d((0, 2, 1, 1))
        gate4_wb_cmb = left_center_pad(gate4_wb_cmb).unsqueeze(1)

        right_center_pad = nn.ZeroPad2d((2, 0, 1, 1))
        gate5_wb_cmb = right_center_pad(gate5_wb_cmb).unsqueeze(1)

        # bottom pad
        left_bottom_pad = nn.ZeroPad2d((0, 2, 2, 0))
        gate6_wb_cmb = left_bottom_pad(gate6_wb_cmb).unsqueeze(1)

        center_bottom_pad = nn.ZeroPad2d((1, 1, 2, 0))
        gate7_wb_cmb = center_bottom_pad(gate7_wb_cmb).unsqueeze(1)

        right_bottm_pad = nn.ZeroPad2d((2, 0, 2, 0))
        gate8_wb_cmb = right_bottm_pad(gate8_wb_cmb).unsqueeze(1)

        gate_wb = torch.cat(
            (gate1_wb_cmb, gate2_wb_cmb, gate3_wb_cmb, gate4_wb_cmb,
             gate5_wb_cmb, gate6_wb_cmb, gate7_wb_cmb, gate8_wb_cmb), 1)

        # pad input and convert to 8 channel 3D features
        raw_depht_input = blur_depth
        # blur_depht_pad = nn.ZeroPad2d((1,1,1,1))
        result_depth = blur_depth

        if sparse_depth is not None:
            sparse_mask = sparse_depth.sign()

        for i in range(self.prop_time):

            # one propagation
            spn_kernel = self.prop_kernel
            result_depth = self.pad_blur_depth(result_depth)
            neigbor_weighted_sum = self.eight_way_propagation(
                gate_wb, result_depth, spn_kernel)
            neigbor_weighted_sum = neigbor_weighted_sum.squeeze(1)
            neigbor_weighted_sum = neigbor_weighted_sum[:, :, 1:-1, 1:-1]
            result_depth = neigbor_weighted_sum
            if sparse_depth is not None:
                result_depth = (
                    1 -
                    sparse_mask) * result_depth + sparse_mask * raw_depht_input

        return result_depth
Esempio n. 19
0
 def __init__(self, stride=2, padding=1):
     super(AvgPoolPad, self).__init__()
     self.pad = nn.ZeroPad2d((1, 0, 1, 0))
     self.pool = nn.AvgPool2d(3, stride=stride, padding=padding, count_include_pad=False)
    def __init__(self, n_class=1):
        super(FCN8s, self).__init__()

        padDim = 4
        nFilters = [16, 32, 32]
        filtsize = [5, 5, 5]
        poolsize = [2, 2, 2]
        stepSize = [2, 2, 2]

        ninputChannels = 5

        self.padding_1 = nn.ZeroPad2d(padDim)
        self.cnn_conv1 = nn.Conv2d(ninputChannels, nFilters[0],
                                   (filtsize[0], filtsize[0]), (1, 1))
        self.tanh1 = nn.Tanh()
        self.maxpool1 = nn.MaxPool2d((poolsize[0], poolsize[0]),
                                     (stepSize[0], stepSize[0]))

        ninputChannels = nFilters[0]
        self.padding_2 = nn.ZeroPad2d(padDim)
        self.cnn_conv2 = nn.Conv2d(ninputChannels, nFilters[1],
                                   (filtsize[1], filtsize[1]), (1, 1))
        self.tanh2 = nn.Tanh()
        self.maxpool2 = nn.MaxPool2d((poolsize[1], poolsize[1]),
                                     (stepSize[1], stepSize[1]))

        ninputChannels = nFilters[1]
        self.padding_3 = nn.ZeroPad2d(padDim)
        self.cnn_conv3 = nn.Conv2d(ninputChannels, nFilters[2],
                                   (filtsize[2], filtsize[2]), (1, 1))
        self.tanh3 = nn.Tanh()
        self.maxpool3 = nn.MaxPool2d((poolsize[2], poolsize[2]),
                                     (stepSize[2], stepSize[2]))

        nFullyConnected = nFilters[2] * 10 * 8

        self.cnn_drop = nn.Dropout2d(p=0.6)
        self.linear = nn.Linear(nFullyConnected, 128)

        # conv1
        self.conv1_1 = nn.Conv2d(128, 64, (1, 3), padding=(0, 100))
        self.bnorm1_1 = nn.BatchNorm2d(64)
        self.relu1_1 = nn.Tanh()
        self.conv1_2 = nn.Conv2d(64, 64, (1, 3), padding=(0, 1))
        self.bnorm1_2 = nn.BatchNorm2d(64)
        self.relu1_2 = nn.Tanh()
        self.pool1 = nn.MaxPool2d((1, 2), stride=(1, 2), ceil_mode=True)  # 1/2

        # conv2
        self.conv2_1 = nn.Conv2d(64, 128, (1, 3), padding=(0, 1))
        self.bnorm2_1 = nn.BatchNorm2d(128)
        self.relu2_1 = nn.Tanh()
        self.conv2_2 = nn.Conv2d(128, 128, (1, 3), padding=(0, 1))
        self.bnorm2_2 = nn.BatchNorm2d(128)
        self.relu2_2 = nn.Tanh()
        self.pool2 = nn.MaxPool2d((1, 2), stride=(1, 2), ceil_mode=True)  # 1/4

        # conv3
        self.conv3_1 = nn.Conv2d(128, 256, (1, 3), padding=(0, 1))
        self.bnorm3_1 = nn.BatchNorm2d(256)
        self.relu3_1 = nn.Tanh()
        self.conv3_2 = nn.Conv2d(256, 256, (1, 3), padding=(0, 1))
        self.bnorm3_2 = nn.BatchNorm2d(256)
        self.relu3_2 = nn.Tanh()
        self.conv3_3 = nn.Conv2d(256, 256, (1, 3), padding=(0, 1))
        self.bnorm3_3 = nn.BatchNorm2d(256)
        self.relu3_3 = nn.Tanh()
        self.pool3 = nn.MaxPool2d((1, 2), stride=(1, 2), ceil_mode=True)  # 1/8

        # conv4
        self.conv4_1 = nn.Conv2d(256, 512, (1, 3), padding=(0, 1))
        self.bnorm4_1 = nn.BatchNorm2d(512)
        self.relu4_1 = nn.Tanh()
        self.conv4_2 = nn.Conv2d(512, 512, (1, 3), padding=(0, 1))
        self.bnorm4_2 = nn.BatchNorm2d(512)
        self.relu4_2 = nn.Tanh()
        self.conv4_3 = nn.Conv2d(512, 512, (1, 3), padding=(0, 1))
        self.bnorm4_3 = nn.BatchNorm2d(512)
        self.relu4_3 = nn.Tanh()
        self.pool4 = nn.MaxPool2d((1, 2), stride=(1, 2),
                                  ceil_mode=True)  # 1/16

        # conv5
        self.conv5_1 = nn.Conv2d(512, 512, (1, 3), padding=(0, 1))
        self.bnorm5_1 = nn.BatchNorm2d(512)
        self.relu5_1 = nn.Tanh()
        self.conv5_2 = nn.Conv2d(512, 512, (1, 3), padding=(0, 1))
        self.bnorm5_2 = nn.BatchNorm2d(512)
        self.relu5_2 = nn.Tanh()
        self.conv5_3 = nn.Conv2d(512, 512, (1, 3), padding=(0, 1))
        self.bnorm5_3 = nn.BatchNorm2d(512)
        self.relu5_3 = nn.Tanh()
        self.pool5 = nn.MaxPool2d((1, 2), stride=(1, 2),
                                  ceil_mode=True)  # 1/32

        # fc6
        self.fc6 = nn.Conv2d(512, 4096, (1, 7))
        self.relu6 = nn.Tanh()
        self.drop6 = nn.Dropout2d()

        # fc7
        self.fc7 = nn.Conv2d(4096, 4096, (1, 1))
        self.relu7 = nn.Tanh()
        self.drop7 = nn.Dropout2d()

        self.score_fr = nn.Conv2d(4096, n_class, (1, 1))
        self.score_pool3 = nn.Conv2d(256, n_class, (1, 1))
        self.score_pool4 = nn.Conv2d(512, n_class, (1, 1))

        self.upscore2 = nn.ConvTranspose2d(n_class,
                                           n_class, (1, 4),
                                           stride=(1, 2),
                                           bias=False)
        self.upscore8 = nn.ConvTranspose2d(n_class,
                                           n_class, (1, 16),
                                           stride=(1, 8),
                                           bias=False)
        self.upscore_pool4 = nn.ConvTranspose2d(n_class,
                                                n_class, (1, 4),
                                                stride=(1, 2),
                                                bias=False)

        self.Sigmoid = nn.Sigmoid()

        self.classifierLayer = nn.Linear(128, 150)

        self.logsoftmax = nn.LogSoftmax()
Esempio n. 21
0
def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    """
    hyperparams = module_defs.pop(0)
    output_filters = [int(hyperparams["channels"])]
    module_list = nn.ModuleList()
    for i, module_def in enumerate(module_defs):
        modules = nn.Sequential()

        if module_def["type"] == "convolutional":
            bn = int(module_def["batch_normalize"])
            filters = int(module_def["filters"])
            kernel_size = int(module_def["size"])
            pad = (kernel_size - 1) // 2 if int(module_def["pad"]) else 0
            modules.add_module(
                "conv_%d" % i,
                nn.Conv2d(
                    in_channels=output_filters[-1],
                    out_channels=filters,
                    kernel_size=kernel_size,
                    stride=int(module_def["stride"]),
                    padding=pad,
                    bias=not bn,
                ),
            )
            if bn:
                modules.add_module("batch_norm_%d" % i, nn.BatchNorm2d(filters))
            if module_def["activation"] == "leaky":
                modules.add_module("leaky_%d" % i, nn.LeakyReLU(0.1))

        elif module_def["type"] == "maxpool":
            kernel_size = int(module_def["size"])
            stride = int(module_def["stride"])
            if kernel_size == 2 and stride == 1:
                padding = nn.ZeroPad2d((0, 1, 0, 1))
                modules.add_module("_debug_padding_%d" % i, padding)
            maxpool = nn.MaxPool2d(
                kernel_size=int(module_def["size"]),
                stride=int(module_def["stride"]),
                padding=int((kernel_size - 1) // 2),
            )
            modules.add_module("maxpool_%d" % i, maxpool)

        elif module_def["type"] == "upsample":
            upsample = nn.Upsample(scale_factor=int(module_def["stride"]), mode="nearest")
            modules.add_module("upsample_%d" % i, upsample)

        elif module_def["type"] == "route":
            layers = [int(x) for x in module_def["layers"].split(",")]
            filters = sum([output_filters[layer_i] for layer_i in layers])
            modules.add_module("route_%d" % i, EmptyLayer())

        elif module_def["type"] == "shortcut":
            filters = output_filters[int(module_def["from"])]
            modules.add_module("shortcut_%d" % i, EmptyLayer())

        elif module_def["type"] == "yolo":
            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
            # Extract anchors
            anchors = [int(x) for x in module_def["anchors"].split(",")]
            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            num_classes = int(module_def["classes"])
            img_height = int(hyperparams["height"])
            # Define detection layer
            yolo_layer = YOLOLayer(anchors, num_classes, img_height)
            modules.add_module("yolo_%d" % i, yolo_layer)
        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list
Esempio n. 22
0
 def __init__(self, padding):
     super(pad, self).__init__()
     self.padding = nn.ZeroPad2d(padding)
Esempio n. 23
0
def right_shift(x):
    x = x[:, :, :, :-1]
    pad = nn.ZeroPad2d((1, 0, 0, 0))
    return pad(x)
Esempio n. 24
0
def save_layer_feature(style_feat, in_featuremap, out_featuremap, save_path,
                       file_name, mean, std):
    paddding = nn.ZeroPad2d(5)
    in_featuremap = paddding(in_featuremap).tanh() / 2 + 0.5
    out_featuremap = paddding(out_featuremap).tanh() / 2 + 0.5
    style_feat = paddding(style_feat).tanh() / 2 + 0.5

    b, c, h, w = in_featuremap.size()

    # features = feature_numpy[0]
    result = Image.new('RGB', (w * c, h * 3 + 60))

    mean = mean.view(3, b, c)
    std = std.view(3, b, c)
    content_mean = mean[0]
    content_std = std[0]
    style_mean = mean[1]
    style_std = std[1]
    adain_mean = mean[2]
    adain_std = std[2]

    fontsize = 10
    font = ImageFont.truetype(
        fm.findfont(fm.FontProperties(family='DejaVu Sans')), fontsize)

    for i in range(c):
        in_feature = style_feat[0:1, i:i + 1, :, :]
        in_feature_numpy = tensor2im(in_feature)
        in_image_pil = Image.fromarray(in_feature_numpy)
        result.paste(in_image_pil, box=(w * i, 0))

        in_feature = in_featuremap[0:1, i:i + 1, :, :]
        in_feature_numpy = tensor2im(in_feature)
        in_image_pil = Image.fromarray(in_feature_numpy)
        result.paste(in_image_pil, box=(w * i, h))

        out_feature = out_featuremap[0:1, i:i + 1, :, :]
        out_max = out_feature[0][0].max()
        out_feature_numpy = tensor2im(out_feature)
        np_max = np.max(out_feature_numpy)
        location = np.where(out_feature_numpy == np_max)
        out_image_pil = Image.fromarray(out_feature_numpy)
        result.paste(out_image_pil, box=(w * i, h * 2))

        draw = ImageDraw.Draw(result)
        color = "#FF0000"
        string = str(round(content_mean[0, i].cpu().item(), 2)) + ', ' + str(
            round(content_std[0, i].cpu().item(), 2))
        draw.text((w * i, h * 3 + 4),
                  string,
                  font=font,
                  fill=color,
                  spacing=0,
                  align='left')
        color = "#00FF00"
        string = str(round(style_mean[0, i].cpu().item(), 2)) + ', ' + str(
            round(style_std[0, i].cpu().item(), 2))
        draw.text((w * i, h * 3 + 4 + 20),
                  string,
                  font=font,
                  fill=color,
                  spacing=0,
                  align='left')
        color = "#FFFFFF"
        string = str(round(adain_mean[0, i].cpu().item(), 2)) + ', ' + str(
            round(adain_std[0, i].cpu().item(), 2))
        draw.text((w * i, h * 3 + 4 + 40),
                  string,
                  font=font,
                  fill=color,
                  spacing=0,
                  align='left')

    save_name = os.path.join(save_path, str(i) + '.jpg')
    result.save(save_name, quality=100)
    def __init__(self, in_channels_left, out_channels_left, in_channels_right,
                 out_channels_right):
        super(FirstCell, self).__init__()
        self.conv_1x1 = nn.Sequential()
        self.conv_1x1.add_module('relu', nn.ReLU())
        self.conv_1x1.add_module(
            'conv',
            nn.Conv2d(in_channels_right,
                      out_channels_right,
                      1,
                      stride=1,
                      bias=False))
        self.conv_1x1.add_module(
            'bn',
            nn.BatchNorm2d(out_channels_right,
                           eps=0.001,
                           momentum=0.1,
                           affine=True))

        self.relu = nn.ReLU()
        self.path_1 = nn.Sequential()
        self.path_1.add_module(
            'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False))
        self.path_1.add_module(
            'conv',
            nn.Conv2d(in_channels_left,
                      out_channels_left,
                      1,
                      stride=1,
                      bias=False))
        self.path_2 = nn.ModuleList()
        self.path_2.add_module('pad', nn.ZeroPad2d((0, 1, 0, 1)))
        self.path_2.add_module(
            'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False))
        self.path_2.add_module(
            'conv',
            nn.Conv2d(in_channels_left,
                      out_channels_left,
                      1,
                      stride=1,
                      bias=False))

        self.final_path_bn = nn.BatchNorm2d(out_channels_left * 2,
                                            eps=0.001,
                                            momentum=0.1,
                                            affine=True)

        self.comb_iter_0_left = BranchSeparables(out_channels_right,
                                                 out_channels_right,
                                                 5,
                                                 1,
                                                 2,
                                                 bias=False)
        self.comb_iter_0_right = BranchSeparables(out_channels_right,
                                                  out_channels_right,
                                                  3,
                                                  1,
                                                  1,
                                                  bias=False)

        self.comb_iter_1_left = BranchSeparables(out_channels_right,
                                                 out_channels_right,
                                                 5,
                                                 1,
                                                 2,
                                                 bias=False)
        self.comb_iter_1_right = BranchSeparables(out_channels_right,
                                                  out_channels_right,
                                                  3,
                                                  1,
                                                  1,
                                                  bias=False)

        self.comb_iter_2_left = nn.AvgPool2d(3,
                                             stride=1,
                                             padding=1,
                                             count_include_pad=False)

        self.comb_iter_3_left = nn.AvgPool2d(3,
                                             stride=1,
                                             padding=1,
                                             count_include_pad=False)
        self.comb_iter_3_right = nn.AvgPool2d(3,
                                              stride=1,
                                              padding=1,
                                              count_include_pad=False)

        self.comb_iter_4_left = BranchSeparables(out_channels_right,
                                                 out_channels_right,
                                                 3,
                                                 1,
                                                 1,
                                                 bias=False)
Esempio n. 26
0
 def __init__(self):
     super(SimpleNet, self).__init__()
     self.conv_pad_1 = nn.ZeroPad2d((1, 1, 1, 1))
     self.conv_1 = nn.Conv2d(3, 32, (3, 3), (1, 1))
     self.pool_pad_1 = nn.ZeroPad2d((0, 0, 0, 0))
     self.pool_1 = nn.MaxPool2d(1)
     self.drop_1 = nn.Dropout(p=0.2)
     self.conv_pad_2 = nn.ZeroPad2d((1, 1, 1, 1))
     self.conv_2 = nn.Conv2d(32, 32, (3, 3), (1, 1))
     self.pool_pad_2 = nn.ZeroPad2d((0, 0, 0, 0))
     self.pool_2 = nn.MaxPool2d(2)
     self.drop_2 = nn.Dropout(p=0.2)
     self.conv_pad_3 = nn.ZeroPad2d((1, 1, 1, 1))
     self.conv_3 = nn.Conv2d(32, 64, (3, 3), (1, 1))
     self.pool_pad_3 = nn.ZeroPad2d((0, 0, 0, 0))
     self.pool_3 = nn.MaxPool2d(1)
     self.drop_3 = nn.Dropout(p=0.2)
     self.conv_pad_4 = nn.ZeroPad2d((1, 1, 1, 1))
     self.conv_4 = nn.Conv2d(64, 64, (3, 3), (1, 1))
     self.pool_pad_4 = nn.ZeroPad2d((0, 0, 0, 0))
     self.pool_4 = nn.MaxPool2d(2)
     self.drop_4 = nn.Dropout(p=0.2)
     self.conv_pad_5 = nn.ZeroPad2d((1, 1, 1, 1))
     self.conv_5 = nn.Conv2d(64, 128, (3, 3), (1, 1))
     self.pool_pad_5 = nn.ZeroPad2d((0, 0, 0, 0))
     self.pool_5 = nn.MaxPool2d(1)
     self.drop_5 = nn.Dropout(p=0.2)
     self.conv_pad_6 = nn.ZeroPad2d((1, 1, 1, 1))
     self.conv_6 = nn.Conv2d(128, 128, (3, 3), (1, 1))
     self.pool_pad_6 = nn.ZeroPad2d((0, 0, 0, 0))
     self.pool_6 = nn.MaxPool2d(2)
     self.drop_6 = nn.Dropout(p=0.2)
     self.fc = nn.Linear(128 * 4 * 4, 10)
Esempio n. 27
0
 def __init__(self, kernel_size, stride=1, padding=1, zero_pad=False):
     super(MaxPool, self).__init__()
     self.zero_pad = nn.ZeroPad2d((1, 0, 1, 0)) if zero_pad else None
     self.pool = nn.MaxPool2d(kernel_size, stride=stride, padding=padding)
Esempio n. 28
0
    def __init__(self):
        super(CAE, self).__init__()

        self.encoded = None

        # ENCODER

        # 64x64x64
        self.e_conv_1 = nn.Sequential(
            #1
            nn.ZeroPad2d((1, 2, 1, 2)),
            nn.Conv2d(in_channels=3,
                      out_channels=64,
                      kernel_size=(5, 5),
                      stride=(2, 2)),
            nn.LeakyReLU())

        # 128x32x32
        self.e_conv_2 = nn.Sequential(
            #2
            nn.ZeroPad2d((1, 2, 1, 2)),
            nn.Conv2d(in_channels=64,
                      out_channels=128,
                      kernel_size=(5, 5),
                      stride=(2, 2)),
            nn.LeakyReLU())

        # 128x32x32
        self.e_block_1 = nn.Sequential(
            #3
            nn.ZeroPad2d((1, 1, 1, 1)),
            nn.Conv2d(in_channels=64,
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1)),
            nn.LeakyReLU(),

            #4
            nn.ZeroPad2d((1, 1, 1, 1)),
            nn.Conv2d(in_channels=128,
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1)),
        )

        # 128x32x32
        self.e_block_2 = nn.Sequential(
            #5
            nn.ZeroPad2d((1, 1, 1, 1)),
            nn.Conv2d(in_channels=128,
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1)),
            nn.LeakyReLU(),
        )

        # 128x32x32
        self.e_block_3 = nn.Sequential(
            #8
            nn.ZeroPad2d((1, 1, 1, 1)),
            nn.Conv2d(in_channels=128,
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1)),
        )

        # 32x32x32
        self.e_conv_3 = nn.Sequential(
            #9
            nn.Conv2d(in_channels=128,
                      out_channels=32,
                      kernel_size=(5, 5),
                      stride=(1, 1),
                      padding=(2, 2)),
            nn.Tanh())

        # DECODER

        # 128x64x64
        self.d_up_conv_1 = nn.Sequential(
            #1
            nn.Conv2d(in_channels=32,
                      out_channels=64,
                      kernel_size=(3, 3),
                      stride=(1, 1)),
            nn.LeakyReLU(),

            #2
            nn.ZeroPad2d((1, 1, 1, 1)),
            nn.ConvTranspose2d(in_channels=64,
                               out_channels=128,
                               kernel_size=(2, 2),
                               stride=(2, 2)))

        # 128x64x64
        self.d_block_1 = nn.Sequential(
            #3
            nn.ZeroPad2d((1, 1, 1, 1)),
            nn.Conv2d(in_channels=128,
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1)),
            nn.LeakyReLU(),
        )

        # 128x64x64
        self.d_block_2 = nn.Sequential(
            #5
            nn.ZeroPad2d((1, 1, 1, 1)),
            nn.Conv2d(in_channels=128,
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1)),
            nn.LeakyReLU(),
        )

        # 128x64x64
        self.d_block_3 = nn.Sequential(
            #8
            nn.ZeroPad2d((1, 1, 1, 1)),
            nn.Conv2d(in_channels=128,
                      out_channels=128,
                      kernel_size=(3, 3),
                      stride=(1, 1)),
        )

        # 256x128x128
        self.d_up_conv_2 = nn.Sequential(
            #9
            nn.Conv2d(in_channels=128,
                      out_channels=32,
                      kernel_size=(3, 3),
                      stride=(1, 1)),
            nn.LeakyReLU(),

            #10
            nn.ZeroPad2d((1, 1, 1, 1)),
            nn.ConvTranspose2d(in_channels=32,
                               out_channels=256,
                               kernel_size=(2, 2),
                               stride=(2, 2)))

        # 3x128x128
        self.d_up_conv_3 = nn.Sequential(
            #11
            nn.Conv2d(in_channels=256,
                      out_channels=16,
                      kernel_size=(3, 3),
                      stride=(1, 1)),
            nn.LeakyReLU(),

            #12
            nn.ReflectionPad2d((2, 2, 2, 2)),
            nn.Conv2d(in_channels=16,
                      out_channels=3,
                      kernel_size=(3, 3),
                      stride=(1, 1)),
            nn.Tanh())
Esempio n. 29
0
def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    """
    hyperparams = module_defs.pop(0)
    output_filters = [int(hyperparams["channels"])]
    module_list = nn.ModuleList()
    for module_i, module_def in enumerate(module_defs):
        modules = nn.Sequential()

        if module_def["type"] == "convolutional":
            bn = int(module_def["batch_normalize"])
            filters = int(module_def["filters"])
            kernel_size = int(module_def["size"])
            pad = (kernel_size - 1) // 2
            modules.add_module(
                f"conv_{module_i}",
                nn.Conv2d(
                    in_channels=output_filters[-1],
                    out_channels=filters,
                    kernel_size=kernel_size,
                    stride=int(module_def["stride"]),
                    padding=pad,
                    bias=not bn,
                ),
            )
            if bn:
                modules.add_module(
                    f"batch_norm_{module_i}",
                    nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5))
            if module_def["activation"] == "leaky":
                modules.add_module(f"leaky_{module_i}",
                                   nn.LeakyReLU(0.1, inplace=True))

        elif module_def["type"] == "maxpool":
            kernel_size = int(module_def["size"])
            stride = int(module_def["stride"])
            if kernel_size == 2 and stride == 1:
                modules.add_module(f"_debug_padding_{module_i}",
                                   nn.ZeroPad2d((0, 1, 0, 1)))
            maxpool = nn.MaxPool2d(kernel_size=kernel_size,
                                   stride=stride,
                                   padding=int((kernel_size - 1) // 2))
            modules.add_module(f"maxpool_{module_i}", maxpool)

        elif module_def["type"] == "upsample":
            upsample = Upsample(scale_factor=int(module_def["stride"]),
                                mode="nearest")
            modules.add_module(f"upsample_{module_i}", upsample)

        elif module_def["type"] == "route":
            layers = [int(x) for x in module_def["layers"].split(",")]
            filters = sum([output_filters[1:][i] for i in layers])
            modules.add_module(f"route_{module_i}", EmptyLayer())

        elif module_def["type"] == "shortcut":
            filters = output_filters[1:][int(module_def["from"])]
            modules.add_module(f"shortcut_{module_i}", EmptyLayer())

        elif module_def["type"] == "yolo":
            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
            # Extract anchors
            anchors = [int(x) for x in module_def["anchors"].split(",")]
            anchors = [(anchors[i], anchors[i + 1])
                       for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            num_classes = int(module_def["classes"])
            img_size = int(hyperparams["height"])
            # Define detection layer
            yolo_layer = YOLOLayer(anchors, num_classes, img_size)
            modules.add_module(f"yolo_{module_i}", yolo_layer)
        elif module_def["type"] == "graylayer":
            modules.add_module(f"graylayer_{module_i}", grayLayer())
            filters = module_def["filters"]
        elif module_def["type"] == "expandlayer":
            modules.add_module(f"expandlayer_{module_i}", expandLayer())
            filters = module_def["filters"]
        # Register module list and number of output filters
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list
Esempio n. 30
0
    def __init__(self, label_num):
        super(Autoencoder, self).__init__()
        self.label_num = label_num

        self.conv1 = nn.Sequential(nn.ZeroPad2d((1, 2, 1, 2)),
                                   nn.Conv2d(3, 3, kernel_size=3, padding=1),
                                   nn.LeakyReLU(),
                                   nn.Conv2d(3, 3, kernel_size=3, padding=1),
                                   nn.LeakyReLU(),
                                   nn.Conv2d(3, 32, kernel_size=5, stride=2),
                                   nn.LeakyReLU(),
                                   nn.Conv2d(32, 32, kernel_size=3, padding=1),
                                   nn.LeakyReLU(),
                                   nn.Conv2d(32, 32, kernel_size=3, padding=1),
                                   nn.LeakyReLU())
        self.conv2 = nn.Sequential(nn.ZeroPad2d((1, 2, 1, 2)),
                                   nn.Conv2d(32, 64, kernel_size=5, stride=2),
                                   nn.LeakyReLU(),
                                   nn.Conv2d(64, 64, kernel_size=3, padding=1),
                                   nn.LeakyReLU(),
                                   nn.Conv2d(64, 64, kernel_size=3, padding=1),
                                   nn.LeakyReLU())
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.LeakyReLU(), nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.LeakyReLU())
        self.conv4 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),
            nn.LeakyReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.LeakyReLU(), nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.LeakyReLU())
        self.fc1 = nn.Conv2d(256, 10, kernel_size=3, stride=2, padding=1)

        self.fc2 = nn.Linear(640, 2, bias=True)

        self.fc_mu = nn.Sequential(nn.Linear(640, 200, bias=True),
                                   nn.LeakyReLU())

        self.fc_var = nn.Sequential(nn.Linear(640, 200, bias=True), nn.ReLU())

        self.emb_label = nn.Sequential(
            nn.Linear(self.label_num, 200, bias=True), nn.LeakyReLU())

        self.fc2dec = nn.Linear(400, 640, bias=True)

        self.fc1dec = nn.Sequential(
            nn.ConvTranspose2d(10, 256, kernel_size=2, stride=2),
            nn.LeakyReLU())
        self.conv4dec = nn.Sequential(
            nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.LeakyReLU(),
            nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.LeakyReLU(),
            nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2),
            nn.LeakyReLU())
        self.conv3dec = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.LeakyReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.LeakyReLU(),
            nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
            nn.LeakyReLU())
        self.conv2dec = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.LeakyReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.LeakyReLU(),
            nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2),
            nn.LeakyReLU())
        self.conv1dec = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.LeakyReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1), nn.LeakyReLU(),
            nn.ConvTranspose2d(32, 3, kernel_size=2, stride=2), nn.LeakyReLU(),
            nn.Conv2d(3, 3, kernel_size=3, padding=1), nn.LeakyReLU(),
            nn.Conv2d(3, 3, kernel_size=3, padding=1), nn.Sigmoid())