def __init__( self, embed_dim, fixed_embed_dim, num_heads, attn_drop=0., proj_drop=0, rpe=False, qkv_bias=False, qk_scale=None, rpe_length=14) -> None: super().__init__() self.num_heads = num_heads head_dim = embed_dim // num_heads self.scale = qk_scale or head_dim ** -0.5 self.qkv = nn.Linear(embed_dim, embed_dim * 3, bias=qkv_bias) self.attn_drop = nn.Dropout(attn_drop) self.proj = nn.Linear(embed_dim, embed_dim) self.proj_drop = nn.Dropout(proj_drop) self.rpe = rpe if rpe: self.rel_pos_embed_k = RelativePosition2D( fixed_embed_dim // num_heads, rpe_length) self.rel_pos_embed_v = RelativePosition2D( fixed_embed_dim // num_heads, rpe_length)
def __init__(self): super().__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) self.conv2 = nn.Conv2d(32, 64, 3, 1) self.dropout1 = nn.Dropout(0.25) self.dropout2 = nn.Dropout(0.5) self.fc1 = nn.Linear(9216, 128) self.fc2 = nn.Linear(128, 10)
def __init__(self): super().__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) self.conv2 = nn.LayerChoice( [nn.Conv2d(32, 64, 3, 1), DepthwiseSeparableConv(32, 64)]) self.dropout1 = nn.Dropout(nn.ValueChoice([0.25, 0.5, 0.75])) self.dropout2 = nn.Dropout(0.5) feature = nn.ValueChoice([64, 128, 256]) self.fc1 = nn.Linear(9216, feature) self.fc2 = nn.Linear(feature, 10)
def __init__(self): super().__init__() ch1 = ValueChoice([16, 32]) kernel = ValueChoice([3, 5]) self.conv1 = nn.Conv2d(1, ch1, kernel, padding=kernel // 2) self.batch_norm = nn.BatchNorm2d(ch1) self.conv2 = nn.Conv2d(ch1, 64, 3) self.dropout1 = LayerChoice( [nn.Dropout(.25), nn.Dropout(.5), nn.Dropout(.75)]) self.fc = nn.Linear(64, 10)
def __init__(self): super().__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) # LayerChoice is used to select a layer between Conv2d and DwConv. self.conv2 = nn.LayerChoice( [nn.Conv2d(32, 64, 3, 1), DepthwiseSeparableConv(32, 64)]) # ValueChoice is used to select a dropout rate. # ValueChoice can be used as parameter of modules wrapped in `nni.retiarii.nn.pytorch` # or customized modules wrapped with `@basic_unit`. self.dropout1 = nn.Dropout(nn.ValueChoice( [0.25, 0.5, 0.75])) # choose dropout rate from 0.25, 0.5 and 0.75 self.dropout2 = nn.Dropout(0.5) feature = nn.ValueChoice([64, 128, 256]) self.fc1 = nn.Linear(9216, feature) self.fc2 = nn.Linear(feature, 10)
def __init__(self, value_choice=True): super().__init__() self.conv1 = nn.Conv2d(1, 32, 3, 1) self.conv2 = LayerChoice( [nn.Conv2d(32, 64, 3, 1), DepthwiseSeparableConv(32, 64)]) self.dropout1 = LayerChoice( [nn.Dropout(.25), nn.Dropout(.5), nn.Dropout(.75)]) self.dropout2 = nn.Dropout(0.5) if value_choice: hidden = nn.ValueChoice([32, 64, 128]) else: hidden = 64 self.fc1 = nn.Linear(9216, hidden) self.fc2 = nn.Linear(hidden, 10) self.rpfc = nn.Linear(10, 10) self.input_ch = InputChoice(2, 1)
def __init__(self, alpha, depths, convops, kernel_sizes, num_layers, skips, num_classes=1000, dropout=0.2): super().__init__() assert alpha > 0.0 assert len(depths) == len(convops) == len(kernel_sizes) == len(num_layers) == len(skips) == 7 self.alpha = alpha self.num_classes = num_classes depths = _get_depths([_FIRST_DEPTH] + depths, alpha) base_filter_sizes = [16, 24, 40, 80, 96, 192, 320] exp_ratios = [3, 3, 3, 6, 6, 6, 6] strides = [1, 2, 2, 2, 1, 2, 1] layers = [ # First layer: regular conv. nn.Conv2d(3, depths[0], 3, padding=1, stride=2, bias=False), nn.BatchNorm2d(depths[0], momentum=_BN_MOMENTUM), nn.ReLU(inplace=True), ] count = 0 # for conv, prev_depth, depth, ks, skip, stride, repeat, exp_ratio in \ # zip(convops, depths[:-1], depths[1:], kernel_sizes, skips, strides, num_layers, exp_ratios): for filter_size, exp_ratio, stride in zip(base_filter_sizes, exp_ratios, strides): # TODO: restrict that "choose" can only be used within mutator ph = nn.Placeholder(label=f'mutable_{count}', **{ 'kernel_size_options': [1, 3, 5], 'n_layer_options': [1, 2, 3, 4], 'op_type_options': ['__mutated__.base_mnasnet.RegularConv', '__mutated__.base_mnasnet.DepthwiseConv', '__mutated__.base_mnasnet.MobileConv'], # 'se_ratio_options': [0, 0.25], 'skip_options': ['identity', 'no'], 'n_filter_options': [int(filter_size*x) for x in [0.75, 1.0, 1.25]], 'exp_ratio': exp_ratio, 'stride': stride, 'in_ch': depths[0] if count == 0 else None }) layers.append(ph) '''if conv == "mconv": # MNASNet blocks: stacks of inverted residuals. layers.append(_stack_inverted_residual(prev_depth, depth, ks, skip, stride, exp_ratio, repeat, _BN_MOMENTUM)) else: # Normal conv and depth-separated conv layers += _stack_normal_conv(prev_depth, depth, ks, skip, conv == "dconv", stride, repeat, _BN_MOMENTUM)''' count += 1 if count >= 2: break layers += [ # Final mapping to classifier input. nn.Conv2d(depths[7], 1280, 1, padding=0, stride=1, bias=False), nn.BatchNorm2d(1280, momentum=_BN_MOMENTUM), nn.ReLU(inplace=True), ] self.layers = nn.Sequential(*layers) self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True), nn.Linear(1280, num_classes)) self._initialize_weights()
def __init__(self, in_features, out_features, bias=True, use_bn=False, act_func=None, dropout_rate=0, ops_order='weight_bn_act'): super(LinearLayer, self).__init__() self.in_features = in_features self.out_features = out_features self.bias = bias self.use_bn = use_bn self.act_func = act_func self.dropout_rate = dropout_rate self.ops_order = ops_order """ modules """ modules = {} # batch norm if self.use_bn: if self.bn_before_weight: modules['bn'] = nn.BatchNorm1d(in_features) else: modules['bn'] = nn.BatchNorm1d(out_features) else: modules['bn'] = None # activation modules['act'] = build_activation(self.act_func, self.ops_list[0] != 'act') # dropout if self.dropout_rate > 0: modules['dropout'] = nn.Dropout(self.dropout_rate, inplace=True) else: modules['dropout'] = None # linear modules['weight'] = { 'linear': nn.Linear(self.in_features, self.out_features, self.bias) } # add modules for op in self.ops_list: if modules[op] is None: continue elif op == 'weight': if modules['dropout'] is not None: self.add_module('dropout', modules['dropout']) for key in modules['weight']: self.add_module(key, modules['weight'][key]) else: self.add_module(op, modules[op]) self.sequence = nn.Sequential(self._modules)
def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000): super().__init__() self.dropout = nn.Dropout(p=dropout) position = torch.arange(max_len).unsqueeze(1) div_term = torch.exp( torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)) pe = torch.zeros(max_len, 1, d_model) pe[:, 0, 0::2] = torch.sin(position * div_term) pe[:, 0, 1::2] = torch.cos(position * div_term) self.register_buffer('pe', pe)
def __init__(self, num_labels: int = 1000, base_widths: Tuple[int, ...] = (32, 16, 32, 40, 80, 96, 192, 320, 1280), dropout_rate: float = 0., width_mult: float = 1.0, bn_eps: float = 1e-3, bn_momentum: float = 0.1): super().__init__() assert len(base_widths) == 9 # include the last stage info widths here widths = [make_divisible(width * width_mult, 8) for width in base_widths] downsamples = [True, False, True, True, True, False, True, False] self.num_labels = num_labels self.dropout_rate = dropout_rate self.bn_eps = bn_eps self.bn_momentum = bn_momentum self.stem = ConvBNReLU(3, widths[0], stride=2, norm_layer=nn.BatchNorm2d) blocks: List[nn.Module] = [ # first stage is fixed DepthwiseSeparableConv(widths[0], widths[1], kernel_size=3, stride=1) ] # https://github.com/ultmaster/AceNAS/blob/46c8895fd8a05ffbc61a6b44f1e813f64b4f66b7/searchspace/proxylessnas/__init__.py#L21 for stage in range(2, 8): # Rather than returning a fixed module here, # we return a builder that dynamically creates module for different `repeat_idx`. builder = inverted_residual_choice_builder( [3, 6], [3, 5, 7], downsamples[stage], widths[stage - 1], widths[stage], f's{stage}') if stage < 7: blocks.append(nn.Repeat(builder, (1, 4), label=f's{stage}_depth')) else: # No mutation for depth in the last stage. # Directly call builder to initiate one block blocks.append(builder(0)) self.blocks = nn.Sequential(*blocks) # final layers self.feature_mix_layer = ConvBNReLU(widths[7], widths[8], kernel_size=1, norm_layer=nn.BatchNorm2d) self.global_avg_pooling = nn.AdaptiveAvgPool2d(1) self.dropout_layer = nn.Dropout(dropout_rate) self.classifier = nn.Linear(widths[-1], num_labels) reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
def __init__(self, input_size=224, first_conv_channels=16, last_conv_channels=1024, n_classes=1000, affine=False): super().__init__() assert input_size % 32 == 0 self.stage_blocks = [4, 4, 8, 4] self.stage_channels = [64, 160, 320, 640] self._input_size = input_size self._feature_map_size = input_size self._first_conv_channels = first_conv_channels self._last_conv_channels = last_conv_channels self._n_classes = n_classes self._affine = affine self._layerchoice_count = 0 # building first layer self.first_conv = nn.Sequential( nn.Conv2d(3, first_conv_channels, 3, 2, 1, bias=False), nn.BatchNorm2d(first_conv_channels, affine=affine), nn.ReLU(inplace=True), ) self._feature_map_size //= 2 p_channels = first_conv_channels features = [] for num_blocks, channels in zip(self.stage_blocks, self.stage_channels): features.extend(self._make_blocks(num_blocks, p_channels, channels)) p_channels = channels self.features = nn.Sequential(*features) self.conv_last = nn.Sequential( nn.Conv2d(p_channels, last_conv_channels, 1, 1, 0, bias=False), nn.BatchNorm2d(last_conv_channels, affine=affine), nn.ReLU(inplace=True), ) self.globalpool = nn.AvgPool2d(self._feature_map_size) self.dropout = nn.Dropout(0.1) self.classifier = nn.Sequential( nn.Linear(last_conv_channels, n_classes, bias=False), ) self._initialize_weights()
def __init__(self, config): super(SNLIClassifier, self).__init__() self.config = config self.embed = nn.Embedding(config.n_embed, config.d_embed) self.projection = Linear(config.d_embed, config.d_proj) self.encoder = Encoder(config) self.dropout = nn.Dropout(p=config.dp_ratio) self.relu = nn.ReLU() seq_in_size = 2 * config.d_hidden if self.config.birnn: seq_in_size *= 2 lin_config = [seq_in_size] * 2 self.out = nn.Sequential(Linear(*lin_config), self.relu, self.dropout, Linear(*lin_config), self.relu, self.dropout, Linear(*lin_config), self.relu, self.dropout, Linear(seq_in_size, config.d_out))
def __init__(self, config): super(SNLIClassifier, self).__init__() self.embed = nn.Embedding(config["n_embed"], config["d_embed"]) self.projection = Linear(config["d_embed"], config["d_proj"]) self.encoder = Encoder(config) self.dropout = nn.Dropout(p=config["dp_ratio"]) self.relu = nn.ReLU() seq_in_size = 2 * config["d_hidden"] if config["birnn"]: seq_in_size *= 2 lin_config = [seq_in_size] * 2 self.out = nn.Sequential(Linear(*lin_config), self.relu, self.dropout, Linear(*lin_config), self.relu, self.dropout, Linear(*lin_config), self.relu, self.dropout, Linear(seq_in_size, config["d_out"])) self.fix_emb = config["fix_emb"] self.project = config["projection"]
def __init__(self, foo, bar): super().__init__() self.foo = nn.Linear(foo, 3) self.bar = nn.Dropout(bar)
def __init__(self, num_labels: int = 1000, channel_search: bool = False, affine: bool = False): super().__init__() self.num_labels = num_labels self.channel_search = channel_search self.affine = affine # the block number in each stage. 4 stages in total. 20 blocks in total. self.stage_repeats = [4, 4, 8, 4] # output channels for all stages, including the very first layer and the very last layer self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] # building first layer out_channels = self.stage_out_channels[1] self.first_conv = nn.Sequential( nn.Conv2d(3, out_channels, 3, 2, 1, bias=False), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True), ) self.features = [] global_block_idx = 0 for stage_idx, num_repeat in enumerate(self.stage_repeats): for block_idx in range(num_repeat): # count global index to give names to choices global_block_idx += 1 # get ready for input and output in_channels = out_channels out_channels = self.stage_out_channels[stage_idx + 2] stride = 2 if block_idx == 0 else 1 # mid channels can be searched base_mid_channels = out_channels // 2 if self.channel_search: k_choice_list = [ int(base_mid_channels * (.2 * k)) for k in range(1, 9) ] mid_channels = nn.ValueChoice( k_choice_list, label=f'channel_{global_block_idx}') else: mid_channels = int(base_mid_channels) choice_block = nn.LayerChoice( [ ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=3, stride=stride, affine=affine), ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=5, stride=stride, affine=affine), ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=7, stride=stride, affine=affine), ShuffleXceptionBlock(in_channels, out_channels, mid_channels=mid_channels, stride=stride, affine=affine) ], label=f'layer_{global_block_idx}') self.features.append(choice_block) self.features = nn.Sequential(*self.features) # final layers last_conv_channels = self.stage_out_channels[-1] self.conv_last = nn.Sequential( nn.Conv2d(out_channels, last_conv_channels, 1, 1, 0, bias=False), nn.BatchNorm2d(last_conv_channels, affine=affine), nn.ReLU(inplace=True), ) self.globalpool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout = nn.Dropout(0.1) self.classifier = nn.Sequential( nn.Linear(last_conv_channels, num_labels, bias=False), ) self._initialize_weights()
def __init__(self, num_labels: int = 1000, base_widths: Tuple[int, ...] = (16, 16, 32, 64, 128, 256, 512, 1024), width_multipliers: Tuple[float, ...] = (0.5, 0.625, 0.75, 1.0, 1.25, 1.5, 2.0), expand_ratios: Tuple[int, ...] = (1, 2, 3, 4, 5, 6), dropout_rate: float = 0.2, bn_eps: float = 1e-3, bn_momentum: float = 0.1): super().__init__() self.widths = [ nn.ValueChoice([ make_divisible(base_width * mult, 8) for mult in width_multipliers ], label=f'width_{i}') for i, base_width in enumerate(base_widths) ] self.expand_ratios = expand_ratios blocks = [ # Stem ConvBNReLU(3, self.widths[0], nn.ValueChoice([3, 5], label='ks_0'), stride=2, activation_layer=h_swish), SeparableConv(self.widths[0], self.widths[0], activation_layer=nn.ReLU), ] # counting for kernel sizes and expand ratios self.layer_count = 2 blocks += [ # Body self._make_stage(1, self.widths[0], self.widths[1], False, 2, nn.ReLU), self._make_stage(2, self.widths[1], self.widths[2], True, 2, nn.ReLU), self._make_stage(1, self.widths[2], self.widths[3], False, 2, h_swish), self._make_stage(1, self.widths[3], self.widths[4], True, 1, h_swish), self._make_stage(1, self.widths[4], self.widths[5], True, 2, h_swish), ] # Head blocks += [ ConvBNReLU(self.widths[5], self.widths[6], 1, 1, activation_layer=h_swish), nn.AdaptiveAvgPool2d(1), ConvBNReLU(self.widths[6], self.widths[7], 1, 1, norm_layer=nn.Identity, activation_layer=h_swish), ] self.blocks = nn.Sequential(*blocks) self.classifier = nn.Sequential( nn.Dropout(dropout_rate), nn.Linear(self.widths[7], num_labels), ) reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)