Exemple #1
0
 def __init__(
         self,
         embed_dim,
         fixed_embed_dim,
         num_heads,
         attn_drop=0.,
         proj_drop=0,
         rpe=False,
         qkv_bias=False,
         qk_scale=None,
         rpe_length=14) -> None:
     super().__init__()
     self.num_heads = num_heads
     head_dim = embed_dim // num_heads
     self.scale = qk_scale or head_dim ** -0.5
     self.qkv = nn.Linear(embed_dim, embed_dim * 3, bias=qkv_bias)
     self.attn_drop = nn.Dropout(attn_drop)
     self.proj = nn.Linear(embed_dim, embed_dim)
     self.proj_drop = nn.Dropout(proj_drop)
     self.rpe = rpe
     if rpe:
         self.rel_pos_embed_k = RelativePosition2D(
             fixed_embed_dim // num_heads, rpe_length)
         self.rel_pos_embed_v = RelativePosition2D(
             fixed_embed_dim // num_heads, rpe_length)
Exemple #2
0
 def __init__(self):
     super().__init__()
     self.conv1 = nn.Conv2d(1, 32, 3, 1)
     self.conv2 = nn.Conv2d(32, 64, 3, 1)
     self.dropout1 = nn.Dropout(0.25)
     self.dropout2 = nn.Dropout(0.5)
     self.fc1 = nn.Linear(9216, 128)
     self.fc2 = nn.Linear(128, 10)
Exemple #3
0
 def __init__(self):
     super().__init__()
     self.conv1 = nn.Conv2d(1, 32, 3, 1)
     self.conv2 = nn.LayerChoice(
         [nn.Conv2d(32, 64, 3, 1),
          DepthwiseSeparableConv(32, 64)])
     self.dropout1 = nn.Dropout(nn.ValueChoice([0.25, 0.5, 0.75]))
     self.dropout2 = nn.Dropout(0.5)
     feature = nn.ValueChoice([64, 128, 256])
     self.fc1 = nn.Linear(9216, feature)
     self.fc2 = nn.Linear(feature, 10)
Exemple #4
0
 def __init__(self):
     super().__init__()
     ch1 = ValueChoice([16, 32])
     kernel = ValueChoice([3, 5])
     self.conv1 = nn.Conv2d(1, ch1, kernel, padding=kernel // 2)
     self.batch_norm = nn.BatchNorm2d(ch1)
     self.conv2 = nn.Conv2d(ch1, 64, 3)
     self.dropout1 = LayerChoice(
         [nn.Dropout(.25), nn.Dropout(.5),
          nn.Dropout(.75)])
     self.fc = nn.Linear(64, 10)
Exemple #5
0
 def __init__(self):
     super().__init__()
     self.conv1 = nn.Conv2d(1, 32, 3, 1)
     # LayerChoice is used to select a layer between Conv2d and DwConv.
     self.conv2 = nn.LayerChoice(
         [nn.Conv2d(32, 64, 3, 1),
          DepthwiseSeparableConv(32, 64)])
     # ValueChoice is used to select a dropout rate.
     # ValueChoice can be used as parameter of modules wrapped in `nni.retiarii.nn.pytorch`
     # or customized modules wrapped with `@basic_unit`.
     self.dropout1 = nn.Dropout(nn.ValueChoice(
         [0.25, 0.5, 0.75]))  # choose dropout rate from 0.25, 0.5 and 0.75
     self.dropout2 = nn.Dropout(0.5)
     feature = nn.ValueChoice([64, 128, 256])
     self.fc1 = nn.Linear(9216, feature)
     self.fc2 = nn.Linear(feature, 10)
Exemple #6
0
 def __init__(self, value_choice=True):
     super().__init__()
     self.conv1 = nn.Conv2d(1, 32, 3, 1)
     self.conv2 = LayerChoice(
         [nn.Conv2d(32, 64, 3, 1),
          DepthwiseSeparableConv(32, 64)])
     self.dropout1 = LayerChoice(
         [nn.Dropout(.25), nn.Dropout(.5),
          nn.Dropout(.75)])
     self.dropout2 = nn.Dropout(0.5)
     if value_choice:
         hidden = nn.ValueChoice([32, 64, 128])
     else:
         hidden = 64
     self.fc1 = nn.Linear(9216, hidden)
     self.fc2 = nn.Linear(hidden, 10)
     self.rpfc = nn.Linear(10, 10)
     self.input_ch = InputChoice(2, 1)
Exemple #7
0
 def __init__(self, alpha, depths, convops, kernel_sizes, num_layers,
              skips, num_classes=1000, dropout=0.2):
     super().__init__()
     assert alpha > 0.0
     assert len(depths) == len(convops) == len(kernel_sizes) == len(num_layers) == len(skips) == 7
     self.alpha = alpha
     self.num_classes = num_classes
     depths = _get_depths([_FIRST_DEPTH] + depths, alpha)
     base_filter_sizes = [16, 24, 40, 80, 96, 192, 320]
     exp_ratios = [3, 3, 3, 6, 6, 6, 6]
     strides = [1, 2, 2, 2, 1, 2, 1]
     layers = [
         # First layer: regular conv.
         nn.Conv2d(3, depths[0], 3, padding=1, stride=2, bias=False),
         nn.BatchNorm2d(depths[0], momentum=_BN_MOMENTUM),
         nn.ReLU(inplace=True),
     ]
     count = 0
     # for conv, prev_depth, depth, ks, skip, stride, repeat, exp_ratio in \
     #        zip(convops, depths[:-1], depths[1:], kernel_sizes, skips, strides, num_layers, exp_ratios):
     for filter_size, exp_ratio, stride in zip(base_filter_sizes, exp_ratios, strides):
         # TODO: restrict that "choose" can only be used within mutator
         ph = nn.Placeholder(label=f'mutable_{count}', **{
             'kernel_size_options': [1, 3, 5],
             'n_layer_options': [1, 2, 3, 4],
             'op_type_options': ['__mutated__.base_mnasnet.RegularConv',
                                 '__mutated__.base_mnasnet.DepthwiseConv',
                                 '__mutated__.base_mnasnet.MobileConv'],
             # 'se_ratio_options': [0, 0.25],
             'skip_options': ['identity', 'no'],
             'n_filter_options': [int(filter_size*x) for x in [0.75, 1.0, 1.25]],
             'exp_ratio': exp_ratio,
             'stride': stride,
             'in_ch': depths[0] if count == 0 else None
         })
         layers.append(ph)
         '''if conv == "mconv":
             # MNASNet blocks: stacks of inverted residuals.
             layers.append(_stack_inverted_residual(prev_depth, depth, ks, skip,
                                                    stride, exp_ratio, repeat, _BN_MOMENTUM))
         else:
             # Normal conv and depth-separated conv
             layers += _stack_normal_conv(prev_depth, depth, ks, skip, conv == "dconv",
                                          stride, repeat, _BN_MOMENTUM)'''
         count += 1
         if count >= 2:
             break
     layers += [
         # Final mapping to classifier input.
         nn.Conv2d(depths[7], 1280, 1, padding=0, stride=1, bias=False),
         nn.BatchNorm2d(1280, momentum=_BN_MOMENTUM),
         nn.ReLU(inplace=True),
     ]
     self.layers = nn.Sequential(*layers)
     self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True),
                                     nn.Linear(1280, num_classes))
     self._initialize_weights()
Exemple #8
0
    def __init__(self,
                 in_features,
                 out_features,
                 bias=True,
                 use_bn=False,
                 act_func=None,
                 dropout_rate=0,
                 ops_order='weight_bn_act'):
        super(LinearLayer, self).__init__()

        self.in_features = in_features
        self.out_features = out_features
        self.bias = bias

        self.use_bn = use_bn
        self.act_func = act_func
        self.dropout_rate = dropout_rate
        self.ops_order = ops_order
        """ modules """
        modules = {}
        # batch norm
        if self.use_bn:
            if self.bn_before_weight:
                modules['bn'] = nn.BatchNorm1d(in_features)
            else:
                modules['bn'] = nn.BatchNorm1d(out_features)
        else:
            modules['bn'] = None
        # activation
        modules['act'] = build_activation(self.act_func,
                                          self.ops_list[0] != 'act')
        # dropout
        if self.dropout_rate > 0:
            modules['dropout'] = nn.Dropout(self.dropout_rate, inplace=True)
        else:
            modules['dropout'] = None
        # linear
        modules['weight'] = {
            'linear': nn.Linear(self.in_features, self.out_features, self.bias)
        }

        # add modules
        for op in self.ops_list:
            if modules[op] is None:
                continue
            elif op == 'weight':
                if modules['dropout'] is not None:
                    self.add_module('dropout', modules['dropout'])
                for key in modules['weight']:
                    self.add_module(key, modules['weight'][key])
            else:
                self.add_module(op, modules[op])
        self.sequence = nn.Sequential(self._modules)
    def __init__(self,
                 d_model: int,
                 dropout: float = 0.1,
                 max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)
Exemple #10
0
    def __init__(self, num_labels: int = 1000,
                 base_widths: Tuple[int, ...] = (32, 16, 32, 40, 80, 96, 192, 320, 1280),
                 dropout_rate: float = 0.,
                 width_mult: float = 1.0,
                 bn_eps: float = 1e-3,
                 bn_momentum: float = 0.1):

        super().__init__()

        assert len(base_widths) == 9
        # include the last stage info widths here
        widths = [make_divisible(width * width_mult, 8) for width in base_widths]
        downsamples = [True, False, True, True, True, False, True, False]

        self.num_labels = num_labels
        self.dropout_rate = dropout_rate
        self.bn_eps = bn_eps
        self.bn_momentum = bn_momentum

        self.stem = ConvBNReLU(3, widths[0], stride=2, norm_layer=nn.BatchNorm2d)

        blocks: List[nn.Module] = [
            # first stage is fixed
            DepthwiseSeparableConv(widths[0], widths[1], kernel_size=3, stride=1)
        ]

        # https://github.com/ultmaster/AceNAS/blob/46c8895fd8a05ffbc61a6b44f1e813f64b4f66b7/searchspace/proxylessnas/__init__.py#L21
        for stage in range(2, 8):
            # Rather than returning a fixed module here,
            # we return a builder that dynamically creates module for different `repeat_idx`.
            builder = inverted_residual_choice_builder(
                [3, 6], [3, 5, 7], downsamples[stage], widths[stage - 1], widths[stage], f's{stage}')
            if stage < 7:
                blocks.append(nn.Repeat(builder, (1, 4), label=f's{stage}_depth'))
            else:
                # No mutation for depth in the last stage.
                # Directly call builder to initiate one block
                blocks.append(builder(0))

        self.blocks = nn.Sequential(*blocks)

        # final layers
        self.feature_mix_layer = ConvBNReLU(widths[7], widths[8], kernel_size=1, norm_layer=nn.BatchNorm2d)
        self.global_avg_pooling = nn.AdaptiveAvgPool2d(1)
        self.dropout_layer = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(widths[-1], num_labels)

        reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
Exemple #11
0
    def __init__(self,
                 input_size=224,
                 first_conv_channels=16,
                 last_conv_channels=1024,
                 n_classes=1000,
                 affine=False):
        super().__init__()

        assert input_size % 32 == 0
        self.stage_blocks = [4, 4, 8, 4]
        self.stage_channels = [64, 160, 320, 640]
        self._input_size = input_size
        self._feature_map_size = input_size
        self._first_conv_channels = first_conv_channels
        self._last_conv_channels = last_conv_channels
        self._n_classes = n_classes
        self._affine = affine
        self._layerchoice_count = 0

        # building first layer
        self.first_conv = nn.Sequential(
            nn.Conv2d(3, first_conv_channels, 3, 2, 1, bias=False),
            nn.BatchNorm2d(first_conv_channels, affine=affine),
            nn.ReLU(inplace=True),
        )
        self._feature_map_size //= 2

        p_channels = first_conv_channels
        features = []
        for num_blocks, channels in zip(self.stage_blocks,
                                        self.stage_channels):
            features.extend(self._make_blocks(num_blocks, p_channels,
                                              channels))
            p_channels = channels
        self.features = nn.Sequential(*features)

        self.conv_last = nn.Sequential(
            nn.Conv2d(p_channels, last_conv_channels, 1, 1, 0, bias=False),
            nn.BatchNorm2d(last_conv_channels, affine=affine),
            nn.ReLU(inplace=True),
        )
        self.globalpool = nn.AvgPool2d(self._feature_map_size)
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Sequential(
            nn.Linear(last_conv_channels, n_classes, bias=False), )

        self._initialize_weights()
Exemple #12
0
 def __init__(self, config):
     super(SNLIClassifier, self).__init__()
     self.config = config
     self.embed = nn.Embedding(config.n_embed, config.d_embed)
     self.projection = Linear(config.d_embed, config.d_proj)
     self.encoder = Encoder(config)
     self.dropout = nn.Dropout(p=config.dp_ratio)
     self.relu = nn.ReLU()
     seq_in_size = 2 * config.d_hidden
     if self.config.birnn:
         seq_in_size *= 2
     lin_config = [seq_in_size] * 2
     self.out = nn.Sequential(Linear(*lin_config), self.relu,
                              self.dropout, Linear(*lin_config),
                              self.relu, self.dropout,
                              Linear(*lin_config), self.relu,
                              self.dropout,
                              Linear(seq_in_size, config.d_out))
Exemple #13
0
 def __init__(self, config):
     super(SNLIClassifier, self).__init__()
     self.embed = nn.Embedding(config["n_embed"], config["d_embed"])
     self.projection = Linear(config["d_embed"], config["d_proj"])
     self.encoder = Encoder(config)
     self.dropout = nn.Dropout(p=config["dp_ratio"])
     self.relu = nn.ReLU()
     seq_in_size = 2 * config["d_hidden"]
     if config["birnn"]:
         seq_in_size *= 2
     lin_config = [seq_in_size] * 2
     self.out = nn.Sequential(Linear(*lin_config), self.relu,
                              self.dropout, Linear(*lin_config),
                              self.relu, self.dropout,
                              Linear(*lin_config), self.relu,
                              self.dropout,
                              Linear(seq_in_size, config["d_out"]))
     self.fix_emb = config["fix_emb"]
     self.project = config["projection"]
Exemple #14
0
 def __init__(self, foo, bar):
     super().__init__()
     self.foo = nn.Linear(foo, 3)
     self.bar = nn.Dropout(bar)
Exemple #15
0
    def __init__(self,
                 num_labels: int = 1000,
                 channel_search: bool = False,
                 affine: bool = False):
        super().__init__()

        self.num_labels = num_labels
        self.channel_search = channel_search
        self.affine = affine

        # the block number in each stage. 4 stages in total. 20 blocks in total.
        self.stage_repeats = [4, 4, 8, 4]

        # output channels for all stages, including the very first layer and the very last layer
        self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024]

        # building first layer
        out_channels = self.stage_out_channels[1]
        self.first_conv = nn.Sequential(
            nn.Conv2d(3, out_channels, 3, 2, 1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

        self.features = []

        global_block_idx = 0
        for stage_idx, num_repeat in enumerate(self.stage_repeats):
            for block_idx in range(num_repeat):
                # count global index to give names to choices
                global_block_idx += 1

                # get ready for input and output
                in_channels = out_channels
                out_channels = self.stage_out_channels[stage_idx + 2]
                stride = 2 if block_idx == 0 else 1

                # mid channels can be searched
                base_mid_channels = out_channels // 2
                if self.channel_search:
                    k_choice_list = [
                        int(base_mid_channels * (.2 * k)) for k in range(1, 9)
                    ]
                    mid_channels = nn.ValueChoice(
                        k_choice_list, label=f'channel_{global_block_idx}')
                else:
                    mid_channels = int(base_mid_channels)

                choice_block = nn.LayerChoice(
                    [
                        ShuffleNetBlock(in_channels,
                                        out_channels,
                                        mid_channels=mid_channels,
                                        kernel_size=3,
                                        stride=stride,
                                        affine=affine),
                        ShuffleNetBlock(in_channels,
                                        out_channels,
                                        mid_channels=mid_channels,
                                        kernel_size=5,
                                        stride=stride,
                                        affine=affine),
                        ShuffleNetBlock(in_channels,
                                        out_channels,
                                        mid_channels=mid_channels,
                                        kernel_size=7,
                                        stride=stride,
                                        affine=affine),
                        ShuffleXceptionBlock(in_channels,
                                             out_channels,
                                             mid_channels=mid_channels,
                                             stride=stride,
                                             affine=affine)
                    ],
                    label=f'layer_{global_block_idx}')
                self.features.append(choice_block)

        self.features = nn.Sequential(*self.features)

        # final layers
        last_conv_channels = self.stage_out_channels[-1]
        self.conv_last = nn.Sequential(
            nn.Conv2d(out_channels, last_conv_channels, 1, 1, 0, bias=False),
            nn.BatchNorm2d(last_conv_channels, affine=affine),
            nn.ReLU(inplace=True),
        )
        self.globalpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Sequential(
            nn.Linear(last_conv_channels, num_labels, bias=False), )

        self._initialize_weights()
Exemple #16
0
    def __init__(self,
                 num_labels: int = 1000,
                 base_widths: Tuple[int, ...] = (16, 16, 32, 64, 128, 256, 512,
                                                 1024),
                 width_multipliers: Tuple[float, ...] = (0.5, 0.625, 0.75, 1.0,
                                                         1.25, 1.5, 2.0),
                 expand_ratios: Tuple[int, ...] = (1, 2, 3, 4, 5, 6),
                 dropout_rate: float = 0.2,
                 bn_eps: float = 1e-3,
                 bn_momentum: float = 0.1):
        super().__init__()

        self.widths = [
            nn.ValueChoice([
                make_divisible(base_width * mult, 8)
                for mult in width_multipliers
            ],
                           label=f'width_{i}')
            for i, base_width in enumerate(base_widths)
        ]
        self.expand_ratios = expand_ratios

        blocks = [
            # Stem
            ConvBNReLU(3,
                       self.widths[0],
                       nn.ValueChoice([3, 5], label='ks_0'),
                       stride=2,
                       activation_layer=h_swish),
            SeparableConv(self.widths[0],
                          self.widths[0],
                          activation_layer=nn.ReLU),
        ]

        # counting for kernel sizes and expand ratios
        self.layer_count = 2

        blocks += [
            # Body
            self._make_stage(1, self.widths[0], self.widths[1], False, 2,
                             nn.ReLU),
            self._make_stage(2, self.widths[1], self.widths[2], True, 2,
                             nn.ReLU),
            self._make_stage(1, self.widths[2], self.widths[3], False, 2,
                             h_swish),
            self._make_stage(1, self.widths[3], self.widths[4], True, 1,
                             h_swish),
            self._make_stage(1, self.widths[4], self.widths[5], True, 2,
                             h_swish),
        ]

        # Head
        blocks += [
            ConvBNReLU(self.widths[5],
                       self.widths[6],
                       1,
                       1,
                       activation_layer=h_swish),
            nn.AdaptiveAvgPool2d(1),
            ConvBNReLU(self.widths[6],
                       self.widths[7],
                       1,
                       1,
                       norm_layer=nn.Identity,
                       activation_layer=h_swish),
        ]

        self.blocks = nn.Sequential(*blocks)

        self.classifier = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(self.widths[7], num_labels),
        )

        reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)