def __init__(self,
                 output_scale,
                 noise_size=120,
                 num_classes=0,
                 out_channels=3,
                 base_channels=96,
                 block_depth=2,
                 input_scale=4,
                 with_shared_embedding=True,
                 shared_dim=128,
                 sn_eps=1e-6,
                 init_type='ortho',
                 concat_noise=True,
                 act_cfg=dict(type='ReLU', inplace=False),
                 upsample_cfg=dict(type='nearest', scale_factor=2),
                 with_spectral_norm=True,
                 auto_sync_bn=True,
                 blocks_cfg=dict(type='BigGANDeepGenResBlock'),
                 arch_cfg=None,
                 out_norm_cfg=dict(type='BN'),
                 pretrained=None,
                 rgb2bgr=False):
        super().__init__()
        self.noise_size = noise_size
        self.num_classes = num_classes
        self.shared_dim = shared_dim
        self.with_shared_embedding = with_shared_embedding
        self.output_scale = output_scale
        self.arch = arch_cfg if arch_cfg else self._get_default_arch_cfg(
            self.output_scale, base_channels)
        self.input_scale = input_scale
        self.concat_noise = concat_noise
        self.blocks_cfg = deepcopy(blocks_cfg)
        self.upsample_cfg = deepcopy(upsample_cfg)
        self.block_depth = block_depth
        self.rgb2bgr = rgb2bgr

        # Validity Check
        # If 'num_classes' equals to zero, we shall set 'with_shared_embedding'
        # to False.
        if num_classes == 0:
            assert not self.with_shared_embedding
            assert not self.concat_noise
        elif not self.with_shared_embedding:
            # If not `with_shared_embedding`, we will use `nn.Embedding` to
            # replace the original `Linear` layer in conditional BN.
            # Meanwhile, we do not adopt split noises.
            assert not self.concat_noise

        # First linear layer
        if self.concat_noise:
            self.noise2feat = nn.Linear(
                self.noise_size + self.shared_dim,
                self.arch['in_channels'][0] * (self.input_scale**2))
        else:
            self.noise2feat = nn.Linear(
                self.noise_size,
                self.arch['in_channels'][0] * (self.input_scale**2))

        if with_spectral_norm:
            self.noise2feat = spectral_norm(self.noise2feat, eps=sn_eps)

        # If using 'shared_embedding', we will get an unified embedding of
        # label for all blocks. If not, we just pass the label to each
        # block.
        if with_shared_embedding:
            self.shared_embedding = nn.Embedding(num_classes, shared_dim)
        else:
            self.shared_embedding = nn.Identity()

        if num_classes > 0:
            if self.concat_noise:
                self.dim_after_concat = (
                    self.shared_dim + self.noise_size
                    if self.with_shared_embedding else self.num_classes)
            else:
                self.dim_after_concat = (
                    self.shared_dim
                    if self.with_shared_embedding else self.num_classes)
        else:
            self.dim_after_concat = 0
        self.blocks_cfg.update(
            dict(
                dim_after_concat=self.dim_after_concat,
                act_cfg=act_cfg,
                sn_eps=sn_eps,
                input_is_label=(num_classes > 0)
                and (not with_shared_embedding),
                with_spectral_norm=with_spectral_norm,
                auto_sync_bn=auto_sync_bn))

        self.conv_blocks = nn.ModuleList()
        for index, out_ch in enumerate(self.arch['out_channels']):
            for depth in range(self.block_depth):
                # change args to adapt to current block
                block_cfg_ = deepcopy(self.blocks_cfg)
                block_cfg_.update(
                    dict(
                        in_channels=self.arch['in_channels'][index],
                        out_channels=out_ch if depth == (self.block_depth - 1)
                        else self.arch['in_channels'][index],
                        upsample_cfg=self.upsample_cfg
                        if self.arch['upsample'][index]
                        and depth == (self.block_depth - 1) else None))
                self.conv_blocks.append(build_module(block_cfg_))

            if self.arch['attention'][index]:
                self.conv_blocks.append(
                    SelfAttentionBlock(
                        out_ch,
                        with_spectral_norm=with_spectral_norm,
                        sn_eps=sn_eps))

        self.output_layer = SNConvModule(
            self.arch['out_channels'][-1],
            out_channels,
            kernel_size=3,
            padding=1,
            with_spectral_norm=with_spectral_norm,
            spectral_norm_cfg=dict(eps=sn_eps),
            act_cfg=act_cfg,
            norm_cfg=out_norm_cfg,
            bias=True,
            order=('norm', 'act', 'conv'))

        self.init_weights(pretrained=pretrained, init_type=init_type)
Ejemplo n.º 2
0
 def reset_classifier(self, num_classes, global_pool=''):
     self.num_classes = num_classes
     self.head = nn.Linear(
         self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
    def __init__(self,
                 in_channels=512,
                 out_channels=17,
                 num_stages=1,
                 num_deconv_layers=3,
                 num_deconv_filters=(256, 256, 256),
                 num_deconv_kernels=(4, 4, 4),
                 extra=None,
                 loss_keypoint=None,
                 train_cfg=None,
                 test_cfg=None):
        super().__init__()

        self.in_channels = in_channels
        self.num_stages = num_stages
        self.loss = build_loss(loss_keypoint)

        self.train_cfg = {} if train_cfg is None else train_cfg
        self.test_cfg = {} if test_cfg is None else test_cfg
        self.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap')

        if extra is not None and not isinstance(extra, dict):
            raise TypeError('extra should be dict or None.')

        # build multi-stage deconv layers
        self.multi_deconv_layers = nn.ModuleList([])
        for _ in range(self.num_stages):
            if num_deconv_layers > 0:
                deconv_layers = self._make_deconv_layer(
                    num_deconv_layers,
                    num_deconv_filters,
                    num_deconv_kernels,
                )
            elif num_deconv_layers == 0:
                deconv_layers = nn.Identity()
            else:
                raise ValueError(
                    f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
            self.multi_deconv_layers.append(deconv_layers)

        identity_final_layer = False
        if extra is not None and 'final_conv_kernel' in extra:
            assert extra['final_conv_kernel'] in [0, 1, 3]
            if extra['final_conv_kernel'] == 3:
                padding = 1
            elif extra['final_conv_kernel'] == 1:
                padding = 0
            else:
                # 0 for Identity mapping.
                identity_final_layer = True
            kernel_size = extra['final_conv_kernel']
        else:
            kernel_size = 1
            padding = 0

        # build multi-stage final layers
        self.multi_final_layers = nn.ModuleList([])
        for i in range(self.num_stages):
            if identity_final_layer:
                final_layer = nn.Identity()
            else:
                final_layer = build_conv_layer(
                    cfg=dict(type='Conv2d'),
                    in_channels=num_deconv_filters[-1]
                    if num_deconv_layers > 0 else in_channels,
                    out_channels=out_channels,
                    kernel_size=kernel_size,
                    stride=1,
                    padding=padding)
            self.multi_final_layers.append(final_layer)
Ejemplo n.º 4
0
Archivo: pnn.py Proyecto: hengdashi/pnn
    def __init__(self, cid, nchannels, nactions):
        super(PNNColumn, self).__init__()
        nhidden = 256
        self.cid = cid
        # 6 layers neural network
        self.nlayers = 6

        # init normal nn, lateral connection, adapter layer and alpha
        self.w = nn.ModuleList()
        self.u = nn.ModuleList()
        self.v = nn.ModuleList()
        self.alpha = nn.ModuleList()

        # normal neural network
        self.w.append(
            nn.Conv2d(nchannels, 32, kernel_size=3, stride=2, padding=1))
        self.w.extend([
            nn.Conv2d(32, 32, kernel_size=3, stride=2, padding=1)
            for _ in range(self.nlayers - 3)
        ])
        conv_out_size = self._get_conv_out((nchannels, 84, 84))
        self.w.append(nn.Linear(conv_out_size, nhidden))
        # w[-2] is the critic layer and w[-1] is the actor layer
        self.w.append(
            nn.ModuleList(
                [nn.Linear(nhidden, 1),
                 nn.Linear(nhidden, nactions)]))

        # only add lateral connections and adapter layers if not first column
        # v[col][layer][(nnList on that layer)]
        for i in range(self.cid):
            self.v.append(nn.ModuleList())
            # adapter layer
            self.v[i].append(nn.Identity())
            self.v[i].extend([
                nn.Conv2d(32, 1, kernel_size=1)
                for _ in range(self.nlayers - 3)
            ])
            self.v[i].append(nn.Linear(conv_out_size, conv_out_size))
            self.v[i].append(
                nn.ModuleList(
                    [nn.Linear(nhidden, nhidden),
                     nn.Linear(nhidden, nhidden)]))

            # alpha
            self.alpha.append(nn.ParameterList())
            self.alpha[i].append(
                nn.Parameter(torch.Tensor(1), requires_grad=False))
            self.alpha[i].extend([
                nn.Parameter(
                    torch.Tensor(np.array(np.random.choice([1e0, 1e-1,
                                                            1e-2]))))
                for _ in range(self.nlayers)
            ])

            # lateral connection
            self.u.append(nn.ModuleList())
            self.u[i].append(nn.Identity())
            self.u[i].extend([
                nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1)
                for _ in range(self.nlayers - 3)
            ])
            self.u[i].append(nn.Linear(conv_out_size, nhidden))
            self.u[i].append(
                nn.ModuleList(
                    [nn.Linear(nhidden, 1),
                     nn.Linear(nhidden, nactions)]))

        # init weights
        self._reset_parameters()
        self.w[-1][0].weight.data = self._normalized(self.w[-1][0].weight.data)
        self.w[-1][1].weight.data = self._normalized(self.w[-1][1].weight.data,
                                                     1e-2)

        for i in range(self.cid):
            self.v[i][-1][0].weight.data = self._normalized(
                self.v[i][-1][0].weight.data)
            self.v[i][-1][1].weight.data = self._normalized(
                self.v[i][-1][1].weight.data, 1e-2)

            self.u[i][-1][0].weight.data = self._normalized(
                self.u[i][-1][0].weight.data)
            self.u[i][-1][1].weight.data = self._normalized(
                self.u[i][-1][1].weight.data, 1e-2)
Ejemplo n.º 5
0
    def __init__(self,
                 img_size=224,
                 patch_size=16,
                 in_chans=3,
                 num_classes=1000,
                 embed_dim=768,
                 depth=12,
                 num_heads=12,
                 mlp_ratio=4.,
                 qkv_bias=True,
                 qk_scale=None,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 hybrid_backbone=None,
                 norm_layer=nn.LayerNorm):
        super().__init__()
        self.num_classes = num_classes
        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
        norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)

        if hybrid_backbone is not None:
            self.patch_embed = HybridEmbed(hybrid_backbone,
                                           img_size=img_size,
                                           in_chans=in_chans,
                                           embed_dim=embed_dim)
        else:
            self.patch_embed = PatchEmbed(img_size=img_size,
                                          patch_size=patch_size,
                                          in_chans=in_chans,
                                          embed_dim=embed_dim)
        num_patches = self.patch_embed.num_patches

        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(
            torch.zeros(1, num_patches + 1, embed_dim))
        self.pos_drop = nn.Dropout(p=drop_rate)

        self.dpr = [
            x.item() for x in torch.linspace(0, drop_path_rate, depth)
        ]  # stochastic depth decay rule
        self.blocks = nn.ModuleList([
            Block(
                dim=embed_dim,
                num_heads=num_heads,
                mlp_ratio=mlp_ratio,
                qkv_bias=qkv_bias,
                qk_scale=qk_scale,
                drop=drop_rate,
                attn_drop=attn_drop_rate,
                drop_path=self.dpr[i],
                norm_layer=norm_layer,
            ) for i in range(depth)
        ])
        self.norm = norm_layer(embed_dim)

        # NOTE as per official impl, we could have a pre-logits representation dense layer + tanh here
        #self.repr = nn.Linear(embed_dim, representation_size)
        #self.repr_act = nn.Tanh()

        # Classifier head
        self.head = nn.Linear(
            embed_dim, num_classes) if num_classes > 0 else nn.Identity()

        trunc_normal_(self.pos_embed, std=.02)
        trunc_normal_(self.cls_token, std=.02)
        self.apply(self._init_weights)
Ejemplo n.º 6
0
 def __init__(self, in_channels, out_channels, activation='relu'):
     super().__init__()
     self.in_channels, self.out_channels, self.activation = in_channels, out_channels, activation
     self.blocks = nn.Identity()
     self.activate = activation_func(activation)
     self.shortcut = nn.Identity()
Ejemplo n.º 7
0
 def __init__(self, embedding_model, mixup_layer, n_class):
     super().__init__()
     self.mix_model = TMix(embedding_model, mixup_layer=mixup_layer)
     self.classifier = create_sentence_classifier(embedding_model.embed_dim,
                                                  n_class)
     self.sentence_h = nn.Identity()
Ejemplo n.º 8
0
Archivo: pvt.py Proyecto: whai362/PVT
    def __init__(self,
                 img_size=224,
                 patch_size=16,
                 in_chans=3,
                 num_classes=1000,
                 embed_dims=[64, 128, 256, 512],
                 num_heads=[1, 2, 4, 8],
                 mlp_ratios=[4, 4, 4, 4],
                 qkv_bias=False,
                 qk_scale=None,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 norm_layer=nn.LayerNorm,
                 depths=[3, 4, 6, 3],
                 sr_ratios=[8, 4, 2, 1],
                 num_stages=4):
        super().__init__()
        self.num_classes = num_classes
        self.depths = depths
        self.num_stages = num_stages

        dpr = [
            x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
        ]  # stochastic depth decay rule
        cur = 0

        for i in range(num_stages):
            patch_embed = PatchEmbed(
                img_size=img_size if i == 0 else img_size // (2**(i + 1)),
                patch_size=patch_size if i == 0 else 2,
                in_chans=in_chans if i == 0 else embed_dims[i - 1],
                embed_dim=embed_dims[i])
            num_patches = patch_embed.num_patches if i != num_stages - 1 else patch_embed.num_patches + 1
            pos_embed = nn.Parameter(torch.zeros(1, num_patches,
                                                 embed_dims[i]))
            pos_drop = nn.Dropout(p=drop_rate)

            block = nn.ModuleList([
                Block(dim=embed_dims[i],
                      num_heads=num_heads[i],
                      mlp_ratio=mlp_ratios[i],
                      qkv_bias=qkv_bias,
                      qk_scale=qk_scale,
                      drop=drop_rate,
                      attn_drop=attn_drop_rate,
                      drop_path=dpr[cur + j],
                      norm_layer=norm_layer,
                      sr_ratio=sr_ratios[i]) for j in range(depths[i])
            ])
            cur += depths[i]

            setattr(self, f"patch_embed{i + 1}", patch_embed)
            setattr(self, f"pos_embed{i + 1}", pos_embed)
            setattr(self, f"pos_drop{i + 1}", pos_drop)
            setattr(self, f"block{i + 1}", block)

        self.norm = norm_layer(embed_dims[3])

        # cls_token
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims[3]))

        # classification head
        self.head = nn.Linear(
            embed_dims[3], num_classes) if num_classes > 0 else nn.Identity()

        # init weights
        for i in range(num_stages):
            pos_embed = getattr(self, f"pos_embed{i + 1}")
            trunc_normal_(pos_embed, std=.02)
        trunc_normal_(self.cls_token, std=.02)
        self.apply(self._init_weights)
Ejemplo n.º 9
0
            encoder_params: Params of encoder module.
            pooling_params: Params of the pooling layer.
            head_params: 'Head' module params.

        Returns:
            Model.

        """
        encoder: nn.Module = nn.Identity()
        if (encoder_params_ := copy.deepcopy(encoder_params)) is not None:
            encoder_fn = MODULE.get(encoder_params_.pop("module"))
            encoder = encoder_fn(**encoder_params_)

        pool: nn.Module = nn.Identity()
        if (pooling_params_ := copy.deepcopy(pooling_params)) is not None:
            pool_fn = MODULE.get(pooling_params_.pop("module"))
            pool = pool_fn(**pooling_params_)

        head: nn.Module = nn.Identity()
        if (head_params_ := copy.deepcopy(head_params)) is not None:
            head_fn = MODULE.get(head_params_.pop("module"))
            head = head_fn(**head_params_)

        net = cls(encoder=encoder, pool=pool, head=head)
        utils.net_init_(net)

        return net


__all__ = ["VGGConv"]
Ejemplo n.º 10
0
    def __init__(self, config):
        super().__init__(config)

        self.num_labels = config.num_labels
        self.swin = SwinModel(config)

        # Classifier head
        self.classifier = (
            nn.Linear(self.swin.num_features, config.num_labels) if config.num_labels > 0 else nn.Identity()
        )

        # Initialize weights and apply final processing
        self.post_init()
Ejemplo n.º 11
0
    def __init__(self,
                 img_size=224,
                 patch_size=16,
                 in_chans=3,
                 num_classes=1000,
                 embed_dims=[64, 128, 256, 512],
                 num_heads=[1, 2, 4, 8],
                 mlp_ratios=[4, 4, 4, 4],
                 qkv_bias=False,
                 qk_scale=None,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 norm_layer=nn.LayerNorm,
                 depths=[3, 4, 6, 3],
                 sr_ratios=[8, 4, 2, 1]):
        super().__init__()
        self.num_classes = num_classes
        self.depths = depths

        # patch_embed
        self.patch_embed1 = PatchEmbed(img_size=img_size,
                                       patch_size=patch_size,
                                       in_chans=in_chans,
                                       embed_dim=embed_dims[0])
        self.patch_embed2 = GridDown(img_size=img_size // 4,
                                     patch_size=2,
                                     in_chans=embed_dims[0],
                                     embed_dim=embed_dims[1])
        self.patch_embed3 = GridDown(img_size=img_size // 8,
                                     patch_size=2,
                                     in_chans=embed_dims[1],
                                     embed_dim=embed_dims[2])
        self.patch_embed4 = GridDown(img_size=img_size // 16,
                                     patch_size=2,
                                     in_chans=embed_dims[2],
                                     embed_dim=embed_dims[3])

        # pos_embed
        self.pos_embed1 = nn.Parameter(
            torch.zeros(1, self.patch_embed1.num_patches, embed_dims[0]))
        self.pos_drop1 = nn.Dropout(p=drop_rate)
        self.pos_embed2 = nn.Parameter(
            torch.zeros(1, self.patch_embed2.num_patches, embed_dims[1]))
        self.pos_drop2 = nn.Dropout(p=drop_rate)
        self.pos_embed3 = nn.Parameter(
            torch.zeros(1, self.patch_embed3.num_patches, embed_dims[2]))
        self.pos_drop3 = nn.Dropout(p=drop_rate)
        self.pos_embed4 = nn.Parameter(
            torch.zeros(1, self.patch_embed4.num_patches + 1, embed_dims[3]))
        self.pos_drop4 = nn.Dropout(p=drop_rate)

        # transformer encoder
        dpr = [
            x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))
        ]  # stochastic depth decay rule
        cur = 0
        self.block1 = nn.ModuleList([
            Block(dim=embed_dims[0],
                  num_heads=num_heads[0],
                  mlp_ratio=mlp_ratios[0],
                  qkv_bias=qkv_bias,
                  qk_scale=qk_scale,
                  drop=drop_rate,
                  attn_drop=attn_drop_rate,
                  drop_path=dpr[cur + i],
                  norm_layer=norm_layer,
                  sr_ratio=sr_ratios[0]) for i in range(depths[0])
        ])

        cur += depths[0]
        self.block2 = nn.ModuleList([
            Block(dim=embed_dims[1],
                  num_heads=num_heads[1],
                  mlp_ratio=mlp_ratios[1],
                  qkv_bias=qkv_bias,
                  qk_scale=qk_scale,
                  drop=drop_rate,
                  attn_drop=attn_drop_rate,
                  drop_path=dpr[cur + i],
                  norm_layer=norm_layer,
                  sr_ratio=sr_ratios[1]) for i in range(depths[1])
        ])

        cur += depths[1]
        self.block3 = nn.ModuleList([
            Block(dim=embed_dims[2],
                  num_heads=num_heads[2],
                  mlp_ratio=mlp_ratios[2],
                  qkv_bias=qkv_bias,
                  qk_scale=qk_scale,
                  drop=drop_rate,
                  attn_drop=attn_drop_rate,
                  drop_path=dpr[cur + i],
                  norm_layer=norm_layer,
                  sr_ratio=sr_ratios[2]) for i in range(depths[2])
        ])

        cur += depths[2]
        self.block4 = nn.ModuleList([
            Block(dim=embed_dims[3],
                  num_heads=num_heads[3],
                  mlp_ratio=mlp_ratios[3],
                  qkv_bias=qkv_bias,
                  qk_scale=qk_scale,
                  drop=drop_rate,
                  attn_drop=attn_drop_rate,
                  drop_path=dpr[cur + i],
                  norm_layer=norm_layer,
                  sr_ratio=sr_ratios[3]) for i in range(depths[3])
        ])
        self.norm = norm_layer(embed_dims[3])

        # cls_token
        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims[3]))

        # classification head
        self.head = nn.Linear(
            embed_dims[3], num_classes) if num_classes > 0 else nn.Identity()

        # init weights
        trunc_normal_(self.pos_embed1, std=.02)
        trunc_normal_(self.pos_embed2, std=.02)
        trunc_normal_(self.pos_embed3, std=.02)
        trunc_normal_(self.pos_embed4, std=.02)
        trunc_normal_(self.cls_token, std=.02)
        self.apply(self._init_weights)
Ejemplo n.º 12
0
    def __init__(self, config, dim, input_resolution, num_heads, shift_size=0):
        super().__init__()
        self.chunk_size_feed_forward = config.chunk_size_feed_forward
        self.shift_size = shift_size
        self.window_size = config.window_size
        self.input_resolution = input_resolution

        if min(self.input_resolution) <= self.window_size:
            # if window size is larger than input resolution, we don't partition windows
            self.shift_size = 0
            self.window_size = min(self.input_resolution)

        self.layernorm_before = nn.LayerNorm(dim, eps=config.layer_norm_eps)
        self.attention = SwinAttention(config, dim, num_heads)
        self.drop_path = SwinDropPath(config.drop_path_rate) if config.drop_path_rate > 0.0 else nn.Identity()
        self.layernorm_after = nn.LayerNorm(dim, eps=config.layer_norm_eps)
        self.intermediate = SwinIntermediate(config, dim)
        self.output = SwinOutput(config, dim)

        if self.shift_size > 0:
            # calculate attention mask for SW-MSA
            height, width = self.input_resolution
            img_mask = torch.zeros((1, height, width, 1))
            height_slices = (
                slice(0, -self.window_size),
                slice(-self.window_size, -self.shift_size),
                slice(-self.shift_size, None),
            )
            width_slices = (
                slice(0, -self.window_size),
                slice(-self.window_size, -self.shift_size),
                slice(-self.shift_size, None),
            )
            count = 0
            for height_slice in height_slices:
                for width_slice in width_slices:
                    img_mask[:, height_slice, width_slice, :] = count
                    count += 1

            mask_windows = window_partition(img_mask, self.window_size)
            mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
            attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
            attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
        else:
            attn_mask = None

        self.attn_mask = attn_mask
Ejemplo n.º 13
0
import torch
import torch.nn as nn

from horch.models.modules import Conv2d, get_activation, get_norm_layer

OPS = {
    'none':
    lambda C, stride: Zero(stride),
    'avg_pool_3x3':
    lambda C, stride: nn.AvgPool2d(
        3, stride=stride, padding=1, count_include_pad=False),
    'max_pool_3x3':
    lambda C, stride: nn.MaxPool2d(3, stride=stride, padding=1),
    'skip_connect':
    lambda C, stride: nn.Identity() if stride == 1 else FactorizedReduce(C, C),
    'sep_conv_3x3':
    lambda C, stride: SepConv(C, C, 3, stride, 1),
    'sep_conv_5x5':
    lambda C, stride: SepConv(C, C, 5, stride, 2),
    'sep_conv_7x7':
    lambda C, stride: SepConv(C, C, 7, stride, 3),
    'nor_conv_1x1':
    lambda C, stride: ReLUConvBN(C, C, 1, stride),
    'nor_conv_3x3':
    lambda C, stride: ReLUConvBN(C, C, 3, stride),
    'dil_conv_3x3':
    lambda C, stride: DilConv(C, C, 3, stride, 2),
    'dil_conv_5x5':
    lambda C, stride: DilConv(C, C, 5, stride, 4),
    'conv_7x1_1x7':
    lambda C, stride: nn.Sequential(
Ejemplo n.º 14
0
    def __init__(self,
                 problem_type,
                 num_net_outputs=None,
                 quantile_levels=None,
                 train_dataset=None,
                 architecture_desc=None,
                 device=None,
                 **kwargs):
        if (architecture_desc is None) and (train_dataset is None):
            raise ValueError(
                "train_dataset cannot = None if architecture_desc=None")
        super().__init__()
        self.problem_type = problem_type
        if self.problem_type == QUANTILE:
            self.register_buffer(
                'quantile_levels',
                torch.Tensor(quantile_levels).float().reshape(1, -1))
        self.device = torch.device('cpu') if device is None else device
        if architecture_desc is None:
            params = self._set_params(**kwargs)
            # adpatively specify network architecture based on training dataset
            self.from_logits = False
            self.has_vector_features = train_dataset.has_vector_features()
            self.has_embed_features = train_dataset.num_embed_features() > 0
            if self.has_embed_features:
                num_categs_per_feature = train_dataset.getNumCategoriesEmbeddings(
                )
                embed_dims = get_embed_sizes(train_dataset, params,
                                             num_categs_per_feature)
            if self.has_vector_features:
                vector_dims = train_dataset.data_list[
                    train_dataset.vectordata_index].shape[-1]
        else:
            # ignore train_dataset, params, etc. Recreate architecture based on description:
            self.architecture_desc = architecture_desc
            self.has_vector_features = architecture_desc['has_vector_features']
            self.has_embed_features = architecture_desc['has_embed_features']
            self.from_logits = architecture_desc['from_logits']
            params = architecture_desc['params']
            if self.has_embed_features:
                num_categs_per_feature = architecture_desc[
                    'num_categs_per_feature']
                embed_dims = architecture_desc['embed_dims']
            if self.has_vector_features:
                vector_dims = architecture_desc['vector_dims']
        # init input size
        input_size = 0

        # define embedding layer:
        if self.has_embed_features:
            self.embed_blocks = nn.ModuleList()
            for i in range(len(num_categs_per_feature)):
                self.embed_blocks.append(
                    nn.Embedding(num_embeddings=num_categs_per_feature[i],
                                 embedding_dim=embed_dims[i]))
                input_size += embed_dims[i]

        # update input size
        if self.has_vector_features:
            input_size += vector_dims

        # activation
        act_fn = nn.Identity()
        if params['activation'] == 'elu':
            act_fn = nn.ELU()
        elif params['activation'] == 'relu':
            act_fn = nn.ReLU()
        elif params['activation'] == 'tanh':
            act_fn = nn.Tanh()

        layers = []
        if params['use_batchnorm']:
            layers.append(nn.BatchNorm1d(input_size,
                                         track_running_stats=False))
        layers.append(nn.Linear(input_size, params['hidden_size']))
        layers.append(act_fn)
        for _ in range(params['num_layers'] - 1):
            if params['use_batchnorm']:
                layers.append(
                    nn.BatchNorm1d(params['hidden_size'],
                                   track_running_stats=False))
            layers.append(nn.Dropout(params['dropout_prob']))
            layers.append(
                nn.Linear(params['hidden_size'], params['hidden_size']))
            layers.append(act_fn)
        layers.append(nn.Linear(params['hidden_size'], num_net_outputs))
        self.main_block = nn.Sequential(*layers)

        if self.problem_type in [REGRESSION, QUANTILE]:  # set range for output
            y_range = params[
                'y_range']  # Used specifically for regression. = None for classification.
            self.y_constraint = None  # determines if Y-predictions should be constrained
            if y_range is not None:
                if y_range[0] == -np.inf and y_range[1] == np.inf:
                    self.y_constraint = None  # do not worry about Y-range in this case
                elif y_range[0] >= 0 and y_range[1] == np.inf:
                    self.y_constraint = 'nonnegative'
                elif y_range[0] == -np.inf and y_range[1] <= 0:
                    self.y_constraint = 'nonpositive'
                else:
                    self.y_constraint = 'bounded'
                self.y_lower = y_range[0]
                self.y_upper = y_range[1]
                self.y_span = self.y_upper - self.y_lower

        if self.problem_type == QUANTILE:
            self.alpha = params['alpha']  # for huber loss
        if self.problem_type == SOFTCLASS:
            self.log_softmax = torch.nn.LogSoftmax(dim=1)
        if self.problem_type in [BINARY, MULTICLASS, SOFTCLASS]:
            self.softmax = torch.nn.Softmax(dim=1)
        if architecture_desc is None:  # Save Architecture description
            self.architecture_desc = {
                'has_vector_features': self.has_vector_features,
                'has_embed_features': self.has_embed_features,
                'params': params,
                'num_net_outputs': num_net_outputs,
                'from_logits': self.from_logits
            }
            if self.has_embed_features:
                self.architecture_desc[
                    'num_categs_per_feature'] = num_categs_per_feature
                self.architecture_desc['embed_dims'] = embed_dims
            if self.has_vector_features:
                self.architecture_desc['vector_dims'] = vector_dims
Ejemplo n.º 15
0
    def __init__(self,
                 img_size=224,
                 patch_size=16,
                 in_chans=3,
                 num_classes=1000,
                 embed_dim=768,
                 depth=12,
                 num_heads=12,
                 mlp_ratio=4.,
                 qkv_bias=False,
                 qk_scale=None,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 norm_layer=nn.LayerNorm,
                 global_pool=None,
                 block_layers=LayerScale_Block,
                 block_layers_token=LayerScale_Block_CA,
                 Patch_layer=PatchEmbed,
                 act_layer=nn.GELU,
                 Attention_block=Attention_talking_head,
                 Mlp_block=Mlp,
                 init_scale=1e-4,
                 Attention_block_token_only=Class_Attention,
                 Mlp_block_token_only=Mlp,
                 depth_token_only=2,
                 mlp_ratio_clstk=4.0):
        super().__init__()

        self.num_classes = num_classes
        self.num_features = self.embed_dim = embed_dim

        self.patch_embed = Patch_layer(img_size=img_size,
                                       patch_size=patch_size,
                                       in_chans=in_chans,
                                       embed_dim=embed_dim)

        num_patches = self.patch_embed.num_patches

        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim))
        self.pos_drop = nn.Dropout(p=drop_rate)

        dpr = [drop_path_rate for i in range(depth)]
        self.blocks = nn.ModuleList([
            block_layers(dim=embed_dim,
                         num_heads=num_heads,
                         mlp_ratio=mlp_ratio,
                         qkv_bias=qkv_bias,
                         qk_scale=qk_scale,
                         drop=drop_rate,
                         attn_drop=attn_drop_rate,
                         drop_path=dpr[i],
                         norm_layer=norm_layer,
                         act_layer=act_layer,
                         Attention_block=Attention_block,
                         Mlp_block=Mlp_block,
                         init_values=init_scale) for i in range(depth)
        ])

        self.blocks_token_only = nn.ModuleList([
            block_layers_token(dim=embed_dim,
                               num_heads=num_heads,
                               mlp_ratio=mlp_ratio_clstk,
                               qkv_bias=qkv_bias,
                               qk_scale=qk_scale,
                               drop=0.0,
                               attn_drop=0.0,
                               drop_path=0.0,
                               norm_layer=norm_layer,
                               act_layer=act_layer,
                               Attention_block=Attention_block_token_only,
                               Mlp_block=Mlp_block_token_only,
                               init_values=init_scale)
            for i in range(depth_token_only)
        ])

        self.norm = norm_layer(embed_dim)

        self.feature_info = [
            dict(num_chs=embed_dim, reduction=0, module='head')
        ]
        self.head = nn.Linear(
            embed_dim, num_classes) if num_classes > 0 else nn.Identity()

        trunc_normal_(self.pos_embed, std=.02)
        trunc_normal_(self.cls_token, std=.02)
        self.apply(self._init_weights)
Ejemplo n.º 16
0
class VGGConv(nn.Module):
    """VGG-like neural network for image classification.

    Args:
        encoder: Image encoder module, usually used for the extraction
            of embeddings from input signals.
        pool: Pooling layer, used to reduce embeddings from the encoder.
        head: Classification head, usually consists of Fully Connected layers.

    """

    def __init__(
        self, encoder: nn.Module, pool: nn.Module, head: nn.Module,
    ) -> None:
        super().__init__()

        self.encoder = encoder
        self.pool = pool
        self.head = head

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Forward call.

        Args:
            x: Batch of images.

        Returns:
            Batch of logits.

        """
        x = self.pool(self.encoder(x))
        x = x.view(x.shape[0], -1)
        x = self.head(x)

        return x

    @classmethod
    def get_from_params(
        cls,
        encoder_params: Optional[dict] = None,
        pooling_params: Optional[dict] = None,
        head_params: Optional[dict] = None,
    ) -> "VGGConv":
        """Create model based on it config.

        Args:
            encoder_params: Params of encoder module.
            pooling_params: Params of the pooling layer.
            head_params: 'Head' module params.

        Returns:
            Model.

        """
        encoder: nn.Module = nn.Identity()
        if (encoder_params_ := copy.deepcopy(encoder_params)) is not None:
            encoder_fn = MODULE.get(encoder_params_.pop("module"))
            encoder = encoder_fn(**encoder_params_)

        pool: nn.Module = nn.Identity()
        if (pooling_params_ := copy.deepcopy(pooling_params)) is not None:
            pool_fn = MODULE.get(pooling_params_.pop("module"))
            pool = pool_fn(**pooling_params_)
Ejemplo n.º 17
0
def activation_func(activation):
    return nn.ModuleDict(
        [['relu', nn.ReLU(inplace=True)],
         ['leaky_relu',
          nn.LeakyReLU(negative_slope=0.01, inplace=True)],
         ['selu', nn.SELU(inplace=True)], ['none', nn.Identity()]])[activation]
Ejemplo n.º 18
0
    def __init__(
            self,
            out_features: int,
            model_config: ModelConfig,
            met_config: MetricLearningConfig,
            pooling_config: PoolingConfig,
            train_df: pd.DataFrame = pd.DataFrame(),
    ):
        super(ShopeeImgNet4, self).__init__()
        self.model_config = model_config
        self.pooling_config = pooling_config
        self.met_config = met_config
        channel_size = model_config.channel_size
        if "efficientnet-" in model_config.model_arch:
            self.backbone = (EfficientNet.from_pretrained(
                model_config.model_arch) if model_config.pretrained else
                             EfficientNet.from_name(model_config.model_arch))
        else:
            self.backbone = timm.create_model(
                model_config.model_arch, pretrained=model_config.pretrained)

        if ("resnext" in model_config.model_arch
                or "resnet" in model_config.model_arch
                or "xception" in model_config.model_arch
                or "resnest" in model_config.model_arch):
            final_in_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
        elif "efficientnet-" in model_config.model_arch:
            final_in_features = self.backbone._fc.in_features
            self.backbone._dropout = nn.Identity()
            self.backbone._fc = nn.Identity()
            self.backbone._swish = nn.Identity()
        elif "vit" in model_config.model_arch:
            final_in_features = self.backbone.head.in_features
            self.backbone.head = nn.Identity()
        elif "nfnet" in model_config.model_arch:
            final_in_features = self.backbone.head.fc.in_features
            self.backbone.head.global_pool = nn.Identity()
            self.backbone.head.fc = nn.Identity()
        else:
            final_in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()

        if ("efficientnet-" not in model_config.model_arch
                and "nfnet" not in model_config.model_arch):
            self.backbone.global_pool = nn.Identity()

        if pooling_config.name.lower() == "gem":
            self.pooling = GeM(**pooling_config.params)
        else:
            self.pooling = PoolingFactory.get_pooling(pooling_config)

        self.dropout = nn.Dropout(p=model_config.dropout)
        self.bn1 = nn.BatchNorm1d(final_in_features)
        self.fc = nn.Linear(final_in_features, channel_size)
        self.bn2 = nn.BatchNorm1d(channel_size)

        if met_config.name == "ArcAdaptiveMarginProduct":
            self.margin = MetricLearningFactory.get_metric_learning_product(
                met_config,
                in_features=channel_size,
                out_features=out_features,
                train_df=train_df,
            )
        else:
            self.margin = MetricLearningFactory.get_metric_learning_product(
                met_config,
                in_features=channel_size,
                out_features=out_features,
            )
        self._init_params()
Ejemplo n.º 19
0
    def __init__(self, path=None, features=256, non_negative=True, yolo_cfg='',
                 augment=False, image_size=None, device='cpu'):
        """Init.
        Args:
            path (str, optional): Path to saved model. Defaults to None.
            features (int, optional): Number of features. Defaults to 256.
            backbone (str, optional): Backbone network for encoder. Defaults to resnet50
        """
        print("Loading weights: ", path)

        super(MidasYoloNet, self).__init__()

        use_pretrained = False if path is None else True

        self.pretrained, self.scratch = blocks._make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained)

        # Midas Decoder part
        self.scratch.refinenet4 = layers.FeatureFusionBlock(features)
        self.scratch.refinenet3 = layers.FeatureFusionBlock(features)
        self.scratch.refinenet2 = layers.FeatureFusionBlock(features)
        self.scratch.refinenet1 = layers.FeatureFusionBlock(features)

        self.scratch.output_conv = nn.Sequential(
            nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1),
            layers.Interpolate(scale_factor=2, mode="bilinear"),
            nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(True),
            nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
            nn.ReLU(True) if non_negative else nn.Identity(),
        )

        self.yolo_head = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False),  # 208 x 208
            nn.BatchNorm2d(32, momentum=0.03, eps=0.0001),
            nn.LeakyReLU(negative_slope=0.1, inplace=True),

            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1, bias=False), # 104 x 104
            nn.BatchNorm2d(64, momentum=0.03, eps=0.0001),
            nn.LeakyReLU(negative_slope=0.1, inplace=True),

            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1, bias=False),  # 52 x 52
            nn.BatchNorm2d(128, momentum=0.03, eps=0.0001),
            nn.LeakyReLU(negative_slope=0.1, inplace=True),

            nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1, bias=False),  # 26 x 26
            nn.BatchNorm2d(256, momentum=0.03, eps=0.0001),
            nn.LeakyReLU(negative_slope=0.1, inplace=True),

            nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1, bias=False),  # 13 x 13
            nn.BatchNorm2d(512, momentum=0.03, eps=0.0001),
            nn.LeakyReLU(negative_slope=0.1, inplace=True),

            nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=1, bias=False),  # 13 x 13
            nn.BatchNorm2d(1024, momentum=0.03, eps=0.0001),
            nn.LeakyReLU(negative_slope=0.1, inplace=True),

            nn.Conv2d(1024, 2048, kernel_size=3, stride=1, padding=1, bias=False),  # 13 x 13
            nn.BatchNorm2d(2048, momentum=0.03, eps=0.0001),
            nn.LeakyReLU(negative_slope=0.1, inplace=True)

        )
        # Concat midas output with yolo head output
        self.yolo_connect = nn.Sequential(
            nn.Conv2d(2048+256, 2048, kernel_size=3, stride=1, padding=1, bias=False),  # 13 x 13
            nn.BatchNorm2d(2048, momentum=0.03, eps=0.0001),
            nn.LeakyReLU(negative_slope=0.1, inplace=True)
        )
        self.yolo_decoder = yolo3_net.Darknet(cfg=yolo_cfg, img_size=image_size)
        # self.yolo_decoder = yolo3_net.YoloDecoder(img_size=image_size, features=features)

        self.yolo_layers = self.yolo_decoder.yolo_layers
        # self.module_list = self.yolo_decoder.module_list # ToDo Smita: fix this, its a hack

        # Add planercnn model. Can directly add, as its backbone is also Resnet101, same as midas
        planercnn_config = Config(None)
        self.planercnn_decoder = planercnn_net.MaskRCNN(planercnn_config, device)
Ejemplo n.º 20
0
    def __init__(self,
                 in_chs,
                 out_chs=None,
                 stride=1,
                 dilation=1,
                 first_dilation=None,
                 alpha=1.0,
                 beta=1.0,
                 bottle_ratio=0.25,
                 group_size=None,
                 ch_div=1,
                 reg=True,
                 extra_conv=False,
                 skipinit=False,
                 attn_layer=None,
                 attn_gain=2.0,
                 act_layer=None,
                 conv_layer=None,
                 drop_path_rate=0.):
        super().__init__()
        first_dilation = first_dilation or dilation
        out_chs = out_chs or in_chs
        # RegNet variants scale bottleneck from in_chs, otherwise scale from out_chs like ResNet
        mid_chs = make_divisible(
            in_chs * bottle_ratio if reg else out_chs * bottle_ratio, ch_div)
        groups = 1 if not group_size else mid_chs // group_size
        if group_size and group_size % ch_div == 0:
            mid_chs = group_size * groups  # correct mid_chs if group_size divisible by ch_div, otherwise error
        self.alpha = alpha
        self.beta = beta
        self.attn_gain = attn_gain

        if in_chs != out_chs or stride != 1 or dilation != first_dilation:
            self.downsample = DownsampleAvg(in_chs,
                                            out_chs,
                                            stride=stride,
                                            dilation=dilation,
                                            first_dilation=first_dilation,
                                            conv_layer=conv_layer)
        else:
            self.downsample = None

        self.act1 = act_layer()
        self.conv1 = conv_layer(in_chs, mid_chs, 1)
        self.act2 = act_layer(inplace=True)
        self.conv2 = conv_layer(mid_chs,
                                mid_chs,
                                3,
                                stride=stride,
                                dilation=first_dilation,
                                groups=groups)
        if extra_conv:
            self.act2b = act_layer(inplace=True)
            self.conv2b = conv_layer(mid_chs,
                                     mid_chs,
                                     3,
                                     stride=1,
                                     dilation=dilation,
                                     groups=groups)
        else:
            self.act2b = None
            self.conv2b = None
        if reg and attn_layer is not None:
            self.attn = attn_layer(
                mid_chs)  # RegNet blocks apply attn btw conv2 & 3
        else:
            self.attn = None
        self.act3 = act_layer()
        self.conv3 = conv_layer(mid_chs, out_chs, 1)
        if not reg and attn_layer is not None:
            self.attn_last = attn_layer(
                out_chs)  # ResNet blocks apply attn after conv3
        else:
            self.attn_last = None
        self.drop_path = DropPath(
            drop_path_rate) if drop_path_rate > 0 else nn.Identity()
        self.skipinit_gain = nn.Parameter(
            torch.tensor(0.)) if skipinit else None
Ejemplo n.º 21
0
 def __init__(self, in_channels, out_channels):
     super().__init__()
     should_skip = in_channels == out_channels
     self.convolution = nn.Conv2d(in_channels, out_channels, kernel_size=1) if not should_skip else nn.Identity()
     self.fusion = GLPNSelectiveFeatureFusion(out_channels)
     self.upsample = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
Ejemplo n.º 22
0
    def __init__(self,
                 cfg: NfCfg,
                 num_classes=1000,
                 in_chans=3,
                 global_pool='avg',
                 output_stride=32,
                 drop_rate=0.,
                 drop_path_rate=0.):
        super().__init__()
        self.num_classes = num_classes
        self.drop_rate = drop_rate
        assert cfg.act_layer in _nonlin_gamma, f"Please add non-linearity constants for activation ({cfg.act_layer})."
        conv_layer = ScaledStdConv2dSame if cfg.same_padding else ScaledStdConv2d
        if cfg.gamma_in_act:
            act_layer = act_with_gamma(cfg.act_layer,
                                       gamma=_nonlin_gamma[cfg.act_layer])
            conv_layer = partial(conv_layer,
                                 eps=1e-4)  # DM weights better with higher eps
        else:
            act_layer = get_act_layer(cfg.act_layer)
            conv_layer = partial(conv_layer,
                                 gamma=_nonlin_gamma[cfg.act_layer])
        attn_layer = partial(get_attn(cfg.attn_layer), **
                             cfg.attn_kwargs) if cfg.attn_layer else None

        stem_chs = make_divisible(
            (cfg.stem_chs or cfg.channels[0]) * cfg.width_factor, cfg.ch_div)
        self.stem, stem_stride, stem_feat = create_stem(in_chans,
                                                        stem_chs,
                                                        cfg.stem_type,
                                                        conv_layer=conv_layer,
                                                        act_layer=act_layer)

        self.feature_info = [stem_feat] if stem_stride == 4 else []
        drop_path_rates = [
            x.tolist() for x in torch.linspace(
                0, drop_path_rate, sum(cfg.depths)).split(cfg.depths)
        ]
        prev_chs = stem_chs
        net_stride = stem_stride
        dilation = 1
        expected_var = 1.0
        stages = []
        for stage_idx, stage_depth in enumerate(cfg.depths):
            stride = 1 if stage_idx == 0 and stem_stride > 2 else 2
            if stride == 2:
                self.feature_info += [
                    dict(num_chs=prev_chs,
                         reduction=net_stride,
                         module=f'stages.{stage_idx}.0.act1')
                ]
            if net_stride >= output_stride and stride > 1:
                dilation *= stride
                stride = 1
            net_stride *= stride
            first_dilation = 1 if dilation in (1, 2) else 2

            blocks = []
            for block_idx in range(cfg.depths[stage_idx]):
                first_block = block_idx == 0 and stage_idx == 0
                out_chs = make_divisible(
                    cfg.channels[stage_idx] * cfg.width_factor, cfg.ch_div)
                blocks += [
                    NormFreeBlock(
                        in_chs=prev_chs,
                        out_chs=out_chs,
                        alpha=cfg.alpha,
                        beta=1. / expected_var**0.5,
                        stride=stride if block_idx == 0 else 1,
                        dilation=dilation,
                        first_dilation=first_dilation,
                        group_size=cfg.group_size,
                        bottle_ratio=1.
                        if cfg.reg and first_block else cfg.bottle_ratio,
                        ch_div=cfg.ch_div,
                        reg=cfg.reg,
                        extra_conv=cfg.extra_conv,
                        skipinit=cfg.skipinit,
                        attn_layer=attn_layer,
                        attn_gain=cfg.attn_gain,
                        act_layer=act_layer,
                        conv_layer=conv_layer,
                        drop_path_rate=drop_path_rates[stage_idx][block_idx],
                    )
                ]
                if block_idx == 0:
                    expected_var = 1.  # expected var is reset after first block of each stage
                expected_var += cfg.alpha**2  # Even if reset occurs, increment expected variance
                first_dilation = dilation
                prev_chs = out_chs
            stages += [nn.Sequential(*blocks)]
        self.stages = nn.Sequential(*stages)

        if cfg.num_features:
            # The paper NFRegNet models have an EfficientNet-like final head convolution.
            self.num_features = make_divisible(
                cfg.width_factor * cfg.num_features, cfg.ch_div)
            self.final_conv = conv_layer(prev_chs, self.num_features, 1)
        else:
            self.num_features = prev_chs
            self.final_conv = nn.Identity()
        self.final_act = act_layer(inplace=cfg.num_features > 0)
        self.feature_info += [
            dict(num_chs=self.num_features,
                 reduction=net_stride,
                 module='final_act')
        ]

        self.head = ClassifierHead(self.num_features,
                                   num_classes,
                                   pool_type=global_pool,
                                   drop_rate=self.drop_rate)

        for n, m in self.named_modules():
            if 'fc' in n and isinstance(m, nn.Linear):
                if cfg.zero_init_fc:
                    nn.init.zeros_(m.weight)
                else:
                    nn.init.normal_(m.weight, 0., .01)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_in',
                                        nonlinearity='linear')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
Ejemplo n.º 23
0
    def __init__(
        self,
        dimensions: int,
        in_channels: int,
        out_channels: int,
        strides: Union[Sequence[int], int] = 1,
        kernel_size: Union[Sequence[int], int] = 3,
        subunits: int = 2,
        act: Optional[Union[Tuple, str]] = Act.PRELU,
        norm: Union[Tuple, str] = Norm.INSTANCE,
        dropout: Optional[Union[Tuple, str, float]] = None,
        dropout_dim: int = 1,
        dilation: Union[Sequence[int], int] = 1,
        bias: bool = True,
        last_conv_only: bool = False,
        padding: Optional[Union[Sequence[int], int]] = None,
    ) -> None:
        super().__init__()
        self.dimensions = dimensions
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.conv = nn.Sequential()
        self.residual = nn.Identity()
        if not padding:
            padding = same_padding(kernel_size, dilation)
        schannels = in_channels
        sstrides = strides
        subunits = max(1, subunits)

        for su in range(subunits):
            conv_only = last_conv_only and su == (subunits - 1)
            unit = Convolution(
                dimensions,
                schannels,
                out_channels,
                strides=sstrides,
                kernel_size=kernel_size,
                act=act,
                norm=norm,
                dropout=dropout,
                dropout_dim=dropout_dim,
                dilation=dilation,
                bias=bias,
                conv_only=conv_only,
                padding=padding,
            )

            self.conv.add_module(f"unit{su:d}", unit)

            # after first loop set channels and strides to what they should be for subsequent units
            schannels = out_channels
            sstrides = 1

        # apply convolution to input to change number of output channels and size to match that coming from self.conv
        if np.prod(strides) != 1 or in_channels != out_channels:
            rkernel_size = kernel_size
            rpadding = padding

            if np.prod(strides) == 1:  # if only adapting number of channels a 1x1 kernel is used with no padding
                rkernel_size = 1
                rpadding = 0

            conv_type = Conv[Conv.CONV, dimensions]
            self.residual = conv_type(in_channels, out_channels, rkernel_size, strides, rpadding, bias=bias)
Ejemplo n.º 24
0
 def __init__(self, in_channels, out_channels, kernel_size=3, upsampling=1):
     conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, padding=kernel_size // 2)
     upsampling = nn.UpsamplingBilinear2d(scale_factor=upsampling) if upsampling > 1 else nn.Identity()
     super().__init__(conv2d, upsampling)
Ejemplo n.º 25
0
    def __init__(self,
                 args,
                 img_size=32,
                 patch_size=None,
                 in_chans=3,
                 num_classes=1,
                 embed_dim=None,
                 depth=7,
                 num_heads=4,
                 mlp_ratio=4.,
                 qkv_bias=False,
                 qk_scale=None,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.,
                 hybrid_backbone=None,
                 norm_layer=nn.LayerNorm):
        super().__init__()
        self.num_classes = num_classes
        self.num_features = embed_dim = self.embed_dim = args.df_dim  # num_features for consistency with other models
        depth = args.d_depth
        self.args = args
        patch_size = args.patch_size
        self.patch_embed = nn.Conv2d(3,
                                     embed_dim,
                                     kernel_size=patch_size,
                                     stride=patch_size,
                                     padding=0)
        num_patches = (args.img_size // patch_size)**2

        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(
            torch.zeros(1, num_patches + 1, embed_dim))
        self.pos_drop = nn.Dropout(p=drop_rate)

        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)
               ]  # stochastic depth decay rule
        self.blocks = nn.ModuleList([
            Block(dim=embed_dim,
                  num_heads=num_heads,
                  mlp_ratio=mlp_ratio,
                  qkv_bias=qkv_bias,
                  qk_scale=qk_scale,
                  drop=drop_rate,
                  attn_drop=attn_drop_rate,
                  drop_path=dpr[i],
                  norm_layer=norm_layer) for i in range(depth)
        ])
        self.norm = norm_layer(embed_dim)

        # NOTE as per official impl, we could have a pre-logits representation dense layer + tanh here
        #self.repr = nn.Linear(embed_dim, representation_size)
        #self.repr_act = nn.Tanh()

        # Classifier head
        self.head = nn.Linear(
            embed_dim, num_classes) if num_classes > 0 else nn.Identity()

        trunc_normal_(self.pos_embed, std=.02)
        trunc_normal_(self.cls_token, std=.02)
        self.apply(self._init_weights)
Ejemplo n.º 26
0
def main(args: argparse.Namespace):
    logger = CompleteLogger(args.log, args.phase)
    print(args)

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    cudnn.benchmark = True

    # Data loading code
    train_transform = utils.get_train_transform(args.train_resizing,
                                                not args.no_hflip,
                                                args.color_jitter)
    val_transform = utils.get_val_transform(args.val_resizing)
    print("train_transform: ", train_transform)
    print("val_transform: ", val_transform)

    train_dataset, val_dataset, num_classes = utils.get_dataset(
        args.data, args.root, train_transform, val_transform, args.sample_rate,
        args.num_samples_per_classes)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.workers,
                              drop_last=False)
    val_loader = DataLoader(val_dataset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=args.workers)
    print("training dataset size: {} test dataset size: {}".format(
        len(train_dataset), len(val_dataset)))

    # create model
    print("=> using pre-trained model '{}'".format(args.arch))
    backbone = utils.get_model(args.arch, args.pretrained)
    pool_layer = nn.Identity() if args.no_pool else None
    classifier = Classifier(backbone,
                            num_classes,
                            head_source=backbone.copy_head(),
                            pool_layer=pool_layer,
                            finetune=args.finetune).to(device)
    kd = KnowledgeDistillationLoss(args.T)

    source_classifier = nn.Sequential(classifier.backbone,
                                      classifier.pool_layer,
                                      classifier.head_source)
    pretrain_labels = collect_pretrain_labels(train_loader, source_classifier,
                                              device)
    train_dataset = CombineDataset(
        [train_dataset, TensorDataset(pretrain_labels)])
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.workers,
                              drop_last=True)
    train_iter = ForeverDataIterator(train_loader)

    # define optimizer and lr scheduler
    optimizer = SGD(classifier.get_parameters(args.lr),
                    momentum=args.momentum,
                    weight_decay=args.wd,
                    nesterov=True)
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                        args.lr_decay_epochs,
                                                        gamma=args.lr_gamma)

    # resume from the best checkpoint
    if args.phase == 'test':
        checkpoint = torch.load(logger.get_checkpoint_path('best'),
                                map_location='cpu')
        classifier.load_state_dict(checkpoint)
        acc1 = utils.validate(val_loader, classifier, args, device)
        print(acc1)
        return

    # start training
    best_acc1 = 0.0
    for epoch in range(args.epochs):
        # train for one epoch
        train(train_iter, classifier, kd, optimizer, epoch, args)
        lr_scheduler.step()
        # evaluate on validation set
        acc1 = utils.validate(val_loader, classifier, args, device)

        # remember best acc@1 and save checkpoint
        torch.save(classifier.state_dict(),
                   logger.get_checkpoint_path('latest'))
        if acc1 > best_acc1:
            shutil.copy(logger.get_checkpoint_path('latest'),
                        logger.get_checkpoint_path('best'))
        best_acc1 = max(acc1, best_acc1)

    print("best_acc1 = {:3.1f}".format(best_acc1))
    logger.close()
Ejemplo n.º 27
0
dataiter = iter(train_loader)
(images1, images2), labels = dataiter.next()

imshow(torchvision.utils.make_grid(images1))
imshow(torchvision.utils.make_grid(images2))
print(' '.join('%5s' % train_loader.dataset.classes[labels[j]]
               for j in range(len(labels))))

import torch.nn as nn
from torchvision import models

resnet = eval(f'models.{backbone_name}()')
# resnet = eval(f"{backbone_name}()")
resnet.output_dim = resnet.fc.in_features
resnet.fc = nn.Identity()

# if backbone_name == 'resnet18':
#   resnet = models.resnet18(pretrained=False)
# elif backbone_name == 'resnet50':
#   resnet = models.resnet50(pretrained=False)
# else:
#   raise NotImplementedError("Backbone is not implemented!")

import copy
import math
from torch.nn import functional


class MLP(nn.Module):
    def __init__(self, input_dim):
Ejemplo n.º 28
0
    def __init__(self, config):
        super().__init__(config)

        self.num_labels = config.num_labels
        self.deit = DeiTModel(config, add_pooling_layer=False)

        # Classifier heads
        self.cls_classifier = (
            nn.Linear(config.hidden_size, config.num_labels) if config.num_labels > 0 else nn.Identity()
        )
        self.distillation_classifier = (
            nn.Linear(config.hidden_size, config.num_labels) if config.num_labels > 0 else nn.Identity()
        )

        self.init_weights()
Ejemplo n.º 29
0
 def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
     super(Conv, self).__init__()
     self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
     self.bn = nn.BatchNorm2d(c2)
     self.act = nn.Hardswish() if act else nn.Identity()
Ejemplo n.º 30
0
 def __init__(self, dim, depth, heads = 8, dim_head = 64, mlp_mult = 4, local_patch_size = 7, global_k = 7, dropout = 0., has_local = True):
     super().__init__()
     self.layers = nn.ModuleList([])
     for _ in range(depth):
         self.layers.append(nn.ModuleList([
             Residual(PreNorm(dim, LocalAttention(dim, heads = heads, dim_head = dim_head, dropout = dropout, patch_size = local_patch_size))) if has_local else nn.Identity(),
             Residual(PreNorm(dim, FeedForward(dim, mlp_mult, dropout = dropout))) if has_local else nn.Identity(),
             Residual(PreNorm(dim, GlobalAttention(dim, heads = heads, dim_head = dim_head, dropout = dropout, k = global_k))),
             Residual(PreNorm(dim, FeedForward(dim, mlp_mult, dropout = dropout)))
         ]))