def __init__(self): super().__init__() self.linear = nn.LayerChoice([ nn.Linear(3, nn.ValueChoice([10, 20])), nn.Linear(3, nn.ValueChoice([30, 40])) ])
def __init__(self, num_labels: int = 1000, base_widths: Tuple[int, ...] = (32, 16, 32, 40, 80, 96, 192, 320, 1280), dropout_rate: float = 0., width_mult: float = 1.0, bn_eps: float = 1e-3, bn_momentum: float = 0.1): super().__init__() assert len(base_widths) == 9 # include the last stage info widths here widths = [ make_divisible(width * width_mult, 8) for width in base_widths ] downsamples = [True, False, True, True, True, False, True, False] self.num_labels = num_labels self.dropout_rate = dropout_rate self.bn_eps = bn_eps self.bn_momentum = bn_momentum self.first_conv = ConvBNReLU(3, widths[0], stride=2, norm_layer=nn.BatchNorm2d) blocks = [ # first stage is fixed SeparableConv(widths[0], widths[1], kernel_size=3, stride=1) ] # https://github.com/ultmaster/AceNAS/blob/46c8895fd8a05ffbc61a6b44f1e813f64b4f66b7/searchspace/proxylessnas/__init__.py#L21 for stage in range(2, 8): # Rather than returning a fixed module here, # we return a builder that dynamically creates module for different `repeat_idx`. builder = inverted_residual_choice_builder([3, 6], [3, 5, 7], downsamples[stage], widths[stage - 1], widths[stage], f's{stage}') if stage < 6: blocks.append( nn.Repeat(builder, (1, 4), label=f's{stage}_depth')) else: # No mutation for depth in the last stage. # Directly call builder to initiate one block blocks.append(builder(0)) self.blocks = nn.Sequential(*blocks) # final layers self.feature_mix_layer = ConvBNReLU(widths[7], widths[8], kernel_size=1, norm_layer=nn.BatchNorm2d) self.global_avg_pooling = nn.AdaptiveAvgPool2d(1) self.dropout_layer = nn.Dropout(dropout_rate) self.classifier = nn.Linear(widths[-1], num_labels) reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
def __init__(self, num_labels: int = 1000, channel_search: bool = False, affine: bool = False): super().__init__() self.num_labels = num_labels self.channel_search = channel_search self.affine = affine # the block number in each stage. 4 stages in total. 20 blocks in total. self.stage_repeats = [4, 4, 8, 4] # output channels for all stages, including the very first layer and the very last layer self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] # building first layer out_channels = self.stage_out_channels[1] self.first_conv = nn.Sequential( nn.Conv2d(3, out_channels, 3, 2, 1, bias=False), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True), ) feature_blocks = [] global_block_idx = 0 for stage_idx, num_repeat in enumerate(self.stage_repeats): for block_idx in range(num_repeat): # count global index to give names to choices global_block_idx += 1 # get ready for input and output in_channels = out_channels out_channels = self.stage_out_channels[stage_idx + 2] stride = 2 if block_idx == 0 else 1 # mid channels can be searched base_mid_channels = out_channels // 2 if self.channel_search: k_choice_list = [ int(base_mid_channels * (.2 * k)) for k in range(1, 9) ] mid_channels = nn.ValueChoice( k_choice_list, label=f'channel_{global_block_idx}') else: mid_channels = int(base_mid_channels) mid_channels = cast(nn.MaybeChoice[int], mid_channels) choice_block = nn.LayerChoice( [ ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=3, stride=stride, affine=affine), ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=5, stride=stride, affine=affine), ShuffleNetBlock(in_channels, out_channels, mid_channels=mid_channels, kernel_size=7, stride=stride, affine=affine), ShuffleXceptionBlock(in_channels, out_channels, mid_channels=mid_channels, stride=stride, affine=affine) ], label=f'layer_{global_block_idx}') feature_blocks.append(choice_block) self.features = nn.Sequential(*feature_blocks) # final layers last_conv_channels = self.stage_out_channels[-1] self.conv_last = nn.Sequential( nn.Conv2d(out_channels, last_conv_channels, 1, 1, 0, bias=False), nn.BatchNorm2d(last_conv_channels, affine=affine), nn.ReLU(inplace=True), ) self.globalpool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout = nn.Dropout(0.1) self.classifier = nn.Sequential( nn.Linear(last_conv_channels, num_labels, bias=False), ) self._initialize_weights()
def __init__(self): super(Sequence, self).__init__() self.lstm1 = nn.LSTMCell(1, 51) self.lstm2 = nn.LSTMCell(51, 51) self.linear = nn.Linear(51, 1)
def __init__(self, d_embed, d_proj): super().__init__() self.linear = nn.Linear(d_embed, d_proj)
def __init__(self): super(MyModule, self).__init__() self.fc1 = nn.Linear(4, 5, bias=False) self.fc1.weight.data.fill_(2.) self.fc2 = nn.Linear(5, 6, bias=False) self.fc2.weight.data.fill_(3.)
def __init__(self): super(Policy, self).__init__() self.affine1 = nn.Linear(4, 128) self.affine2 = nn.Linear(128, 2)
def __init__(self, num_labels: int = 1000, base_widths: Tuple[int, ...] = (16, 16, 32, 64, 128, 256, 512, 1024), width_multipliers: Tuple[float, ...] = (0.5, 0.625, 0.75, 1.0, 1.25, 1.5, 2.0), expand_ratios: Tuple[int, ...] = (1, 2, 3, 4, 5, 6), dropout_rate: float = 0.2, bn_eps: float = 1e-3, bn_momentum: float = 0.1): super().__init__() self.widths = [ nn.ValueChoice([ make_divisible(base_width * mult, 8) for mult in width_multipliers ], label=f'width_{i}') for i, base_width in enumerate(base_widths) ] self.expand_ratios = expand_ratios blocks = [ # Stem ConvBNReLU(3, self.widths[0], nn.ValueChoice([3, 5], label='ks_0'), stride=2, activation_layer=h_swish), SeparableConv(self.widths[0], self.widths[0], activation_layer=nn.ReLU), ] # counting for kernel sizes and expand ratios self.layer_count = 2 blocks += [ # Body self._make_stage(1, self.widths[0], self.widths[1], False, 2, nn.ReLU), self._make_stage(2, self.widths[1], self.widths[2], True, 2, nn.ReLU), self._make_stage(1, self.widths[2], self.widths[3], False, 2, h_swish), self._make_stage(1, self.widths[3], self.widths[4], True, 1, h_swish), self._make_stage(1, self.widths[4], self.widths[5], True, 2, h_swish), ] # Head blocks += [ ConvBNReLU(self.widths[5], self.widths[6], 1, 1, activation_layer=h_swish), nn.AdaptiveAvgPool2d(1), ConvBNReLU(self.widths[6], self.widths[7], 1, 1, norm_layer=nn.Identity, activation_layer=h_swish), ] self.blocks = nn.Sequential(*blocks) self.classifier = nn.Sequential( nn.Dropout(dropout_rate), nn.Linear(self.widths[7], num_labels), ) reset_parameters(self, bn_momentum=bn_momentum, bn_eps=bn_eps)
def __init__(self): super().__init__() self.cell = nn.Cell([nn.Linear(16, 16), nn.Linear(16, 16, bias=False)], num_nodes=4, num_ops_per_node=2, num_predecessors=2, merge_op='all')
def __init__(self, num_nodes): super().__init__() self.ops = nn.ModuleList() self.num_nodes = num_nodes for _ in range(num_nodes): self.ops.append(nn.Linear(16, 16))
def __init__(self, op_candidates: List[str], merge_op: Literal['all', 'loose_end'] = 'all', num_nodes_per_cell: int = 4, width: Union[Tuple[int, ...], int] = 16, num_cells: Union[Tuple[int, ...], int] = 20, dataset: Literal['cifar', 'imagenet'] = 'imagenet', auxiliary_loss: bool = False): super().__init__() self.dataset = dataset self.num_labels = 10 if dataset == 'cifar' else 1000 self.auxiliary_loss = auxiliary_loss # preprocess the specified width and depth if isinstance(width, Iterable): C = nn.ValueChoice(list(width), label='width') else: C = width self.num_cells: nn.MaybeChoice[int] = cast(int, num_cells) if isinstance(num_cells, Iterable): self.num_cells = nn.ValueChoice(list(num_cells), label='depth') num_cells_per_stage = [ (i + 1) * self.num_cells // 3 - i * self.num_cells // 3 for i in range(3) ] # auxiliary head is different for network targetted at different datasets if dataset == 'imagenet': self.stem0 = nn.Sequential( nn.Conv2d(3, cast(int, C // 2), kernel_size=3, stride=2, padding=1, bias=False), nn.BatchNorm2d(cast(int, C // 2)), nn.ReLU(inplace=True), nn.Conv2d(cast(int, C // 2), cast(int, C), 3, stride=2, padding=1, bias=False), nn.BatchNorm2d(C), ) self.stem1 = nn.Sequential( nn.ReLU(inplace=True), nn.Conv2d(cast(int, C), cast(int, C), 3, stride=2, padding=1, bias=False), nn.BatchNorm2d(C), ) C_pprev = C_prev = C_curr = C last_cell_reduce = True elif dataset == 'cifar': self.stem = nn.Sequential( nn.Conv2d(3, cast(int, 3 * C), 3, padding=1, bias=False), nn.BatchNorm2d(cast(int, 3 * C))) C_pprev = C_prev = 3 * C C_curr = C last_cell_reduce = False self.stages = nn.ModuleList() for stage_idx in range(3): if stage_idx > 0: C_curr *= 2 # For a stage, we get C_in, C_curr, and C_out. # C_in is only used in the first cell. # C_curr is number of channels for each operator in current stage. # C_out is usually `C * num_nodes_per_cell` because of concat operator. cell_builder = CellBuilder(op_candidates, C_pprev, C_prev, C_curr, num_nodes_per_cell, merge_op, stage_idx > 0, last_cell_reduce) stage = nn.Repeat(cell_builder, num_cells_per_stage[stage_idx]) self.stages.append(stage) # C_pprev is output channel number of last second cell among all the cells already built. if len(stage) > 1: # Contains more than one cell C_pprev = len(cast(nn.Cell, stage[-2]).output_node_indices) * C_curr else: # Look up in the out channels of last stage. C_pprev = C_prev # This was originally, # C_prev = num_nodes_per_cell * C_curr. # but due to loose end, it becomes, C_prev = len(cast(nn.Cell, stage[-1]).output_node_indices) * C_curr # Useful in aligning the pprev and prev cell. last_cell_reduce = cell_builder.last_cell_reduce if stage_idx == 2: C_to_auxiliary = C_prev if auxiliary_loss: assert isinstance( self.stages[2], nn.Sequential ), 'Auxiliary loss can only be enabled in retrain mode.' self.stages[2] = SequentialBreakdown( cast(nn.Sequential, self.stages[2])) self.auxiliary_head = AuxiliaryHead( C_to_auxiliary, self.num_labels, dataset=self.dataset) # type: ignore self.global_pooling = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Linear(cast(int, C_prev), self.num_labels)
def __init__(self): super().__init__() self.cell = nn.NasBench201Cell([ lambda x, y: nn.Linear(x, y), lambda x, y: nn.Linear(x, y, bias=False) ], 10, 16)
def __init__( self, search_embed_dim: Tuple[int, ...] = (192, 216, 240), search_mlp_ratio: Tuple[float, ...] = (3.5, 4.0), search_num_heads: Tuple[int, ...] = (3, 4), search_depth: Tuple[int, ...] = (12, 13, 14), img_size: int = 224, patch_size: int = 16, in_chans: int = 3, num_classes: int = 1000, qkv_bias: bool = False, drop_rate: float = 0., attn_drop_rate: float = 0., drop_path_rate: float = 0., pre_norm: bool = True, global_pool: bool = False, abs_pos: bool = True, qk_scale: Optional[float] = None, rpe: bool = True, ): super().__init__() embed_dim = nn.ValueChoice(list(search_embed_dim), label="embed_dim") fixed_embed_dim = nn.ModelParameterChoice( list(search_embed_dim), label="embed_dim") depth = nn.ValueChoice(list(search_depth), label="depth") self.patch_embed = nn.Conv2d( in_chans, cast(int, embed_dim), kernel_size=patch_size, stride=patch_size) self.patches_num = int((img_size // patch_size) ** 2) self.global_pool = global_pool self.cls_token = nn.Parameter(torch.zeros(1, 1, cast(int, fixed_embed_dim))) trunc_normal_(self.cls_token, std=.02) dpr = [ x.item() for x in torch.linspace( 0, drop_path_rate, max(search_depth))] # stochastic depth decay rule self.abs_pos = abs_pos if self.abs_pos: self.pos_embed = nn.Parameter(torch.zeros( 1, self.patches_num + 1, cast(int, fixed_embed_dim))) trunc_normal_(self.pos_embed, std=.02) self.blocks = nn.Repeat(lambda index: nn.LayerChoice([ TransformerEncoderLayer(embed_dim=embed_dim, fixed_embed_dim=fixed_embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, drop_rate=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[index], rpe_length=img_size // patch_size, qk_scale=qk_scale, rpe=rpe, pre_norm=pre_norm,) for mlp_ratio, num_heads in itertools.product(search_mlp_ratio, search_num_heads) ], label=f'layer{index}'), depth) self.pre_norm = pre_norm if self.pre_norm: self.norm = nn.LayerNorm(cast(int, embed_dim)) self.head = nn.Linear( cast(int, embed_dim), num_classes) if num_classes > 0 else nn.Identity()
def __init__(self, foo, bar): super().__init__() self.foo = nn.Linear(foo, 3) self.bar = nn.Dropout(bar)
def __init__(self, head_count): super().__init__() embed_dim = ValueChoice(candidates=[32, 64]) self.linear1 = nn.Linear(128, embed_dim) self.mhatt = nn.MultiheadAttention(embed_dim, head_count) self.linear2 = nn.Linear(embed_dim, 1)
def __init__(self): super().__init__() self.cell = nn.Cell([nn.Linear(16, 16), nn.Linear(16, 16, bias=False)], num_nodes=4)
def __init__(self, alpha, depths, convops, kernel_sizes, num_layers, skips, num_classes=1000, dropout=0.2): super(MNASNet, self).__init__() assert alpha > 0.0 assert len(depths) == len(convops) == len(kernel_sizes) == len( num_layers) == len(skips) == 7 self.alpha = alpha self.num_classes = num_classes depths = _get_depths([_FIRST_DEPTH] + depths, alpha) base_filter_sizes = [16, 24, 40, 80, 96, 192, 320] exp_ratios = [3, 3, 3, 6, 6, 6, 6] strides = [1, 2, 2, 2, 1, 2, 1] layers = [ # First layer: regular conv. nn.Conv2d(3, depths[0], 3, padding=1, stride=2, bias=False), nn.BatchNorm2d(depths[0], momentum=_BN_MOMENTUM), nn.ReLU(inplace=True), ] count = 0 #for conv, prev_depth, depth, ks, skip, stride, repeat, exp_ratio in \ # zip(convops, depths[:-1], depths[1:], kernel_sizes, skips, strides, num_layers, exp_ratios): for filter_size, exp_ratio, stride in zip(base_filter_sizes, exp_ratios, strides): # TODO: restrict that "choose" can only be used within mutator ph = nn.Placeholder( label=f'mutable_{count}', related_info={ 'kernel_size_options': [1, 3, 5], 'n_layer_options': [1, 2, 3, 4], 'op_type_options': [ '__mutated__.base_mnasnet.RegularConv', '__mutated__.base_mnasnet.DepthwiseConv', '__mutated__.base_mnasnet.MobileConv' ], #'se_ratio_options': [0, 0.25], 'skip_options': ['identity', 'no'], 'n_filter_options': [int(filter_size * x) for x in [0.75, 1.0, 1.25]], 'exp_ratio': exp_ratio, 'stride': stride, 'in_ch': depths[0] if count == 0 else None }) layers.append(ph) '''if conv == "mconv": # MNASNet blocks: stacks of inverted residuals. layers.append(_stack_inverted_residual(prev_depth, depth, ks, skip, stride, exp_ratio, repeat, _BN_MOMENTUM)) else: # Normal conv and depth-separated conv layers += _stack_normal_conv(prev_depth, depth, ks, skip, conv == "dconv", stride, repeat, _BN_MOMENTUM)''' count += 1 if count >= 2: break layers += [ # Final mapping to classifier input. nn.Conv2d(depths[7], 1280, 1, padding=0, stride=1, bias=False), nn.BatchNorm2d(1280, momentum=_BN_MOMENTUM), nn.ReLU(inplace=True), ] self.layers = nn.Sequential(*layers) self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True), nn.Linear(1280, num_classes)) self._initialize_weights()
def __init__(self): super().__init__() self.m = nn.Linear(4, 5, bias=True)