def __init__(self, num_nodes, num_ops_per_node): super().__init__() self.ops = nn.ModuleList() self.num_nodes = num_nodes self.num_ops_per_node = num_ops_per_node for _ in range(num_nodes): self.ops.append(nn.ModuleList([nn.Linear(16, 16) for __ in range(num_ops_per_node)]))
def __init__(self, n_nodes, channels_pp, channels_p, channels, reduction_p, reduction): super().__init__() self.reduction = reduction self.n_nodes = n_nodes # If previous cell is reduction cell, current input size does not match with # output size of cell[k-2]. So the output[k-2] should be reduced by preprocessing. if reduction_p: self.preproc0 = ops.FactorizedReduce(channels_pp, channels, affine=False) else: self.preproc0 = ops.StdConv(channels_pp, channels, 1, 1, 0, affine=False) self.preproc1 = ops.StdConv(channels_p, channels, 1, 1, 0, affine=False) # generate dag self.mutable_ops = nn.ModuleList() for depth in range(2, self.n_nodes + 2): self.mutable_ops.append( Node( "{}_n{}".format("reduce" if reduction else "normal", depth), depth, channels, 2 if reduction else 0))
def __init__(self, input_size, in_channels, channels, n_classes, n_layers, n_nodes=4, stem_multiplier=3, auxiliary=False): super().__init__() self.in_channels = in_channels self.channels = channels self.n_classes = n_classes self.n_layers = n_layers self.aux_pos = 2 * n_layers // 3 if auxiliary else -1 c_cur = stem_multiplier * self.channels self.stem = nn.Sequential( nn.Conv2d(in_channels, c_cur, 3, 1, 1, bias=False), nn.BatchNorm2d(c_cur)) # for the first cell, stem is used for both s0 and s1 # [!] channels_pp and channels_p is output channel size, but c_cur is input channel size. channels_pp, channels_p, c_cur = c_cur, c_cur, channels self.cells = nn.ModuleList() reduction_p, reduction = False, False for i in range(n_layers): reduction_p, reduction = reduction, False # Reduce featuremap size and double channels in 1/3 and 2/3 layer. if i in [n_layers // 3, 2 * n_layers // 3]: c_cur *= 2 reduction = True cell = Cell(n_nodes, channels_pp, channels_p, c_cur, reduction_p, reduction) self.cells.append(cell) c_cur_out = c_cur * n_nodes channels_pp, channels_p = channels_p, c_cur_out #if i == self.aux_pos: # self.aux_head = AuxiliaryHead(input_size // 4, channels_p, n_classes) self.gap = nn.AdaptiveAvgPool2d(1) self.linear = nn.Linear(channels_p, n_classes)
def __init__(self, node_id, num_prev_nodes, channels, num_downsample_connect): super().__init__() self.ops = nn.ModuleList() choice_keys = [] for i in range(num_prev_nodes): stride = 2 if i < num_downsample_connect else 1 choice_keys.append("{}_p{}".format(node_id, i)) self.ops.append( nn.LayerChoice([ ops.PoolBN('max', channels, 3, stride, 1, affine=False), ops.PoolBN('avg', channels, 3, stride, 1, affine=False), nn.Identity() if stride == 1 else ops.FactorizedReduce(channels, channels, affine=False), ops.SepConv(channels, channels, 3, stride, 1, affine=False), ops.SepConv(channels, channels, 5, stride, 2, affine=False), ops.DilConv(channels, channels, 3, stride, 2, 2, affine=False), ops.DilConv(channels, channels, 5, stride, 4, 2, affine=False) ])) self.drop_path = ops.DropPath() self.input_switch = nn.InputChoice(n_chosen=2)
def __init__(self, op_candidates: List[str], merge_op: Literal['all', 'loose_end'] = 'all', num_nodes_per_cell: int = 4, width: Union[Tuple[int, ...], int] = 16, num_cells: Union[Tuple[int, ...], int] = 20, dataset: Literal['cifar', 'imagenet'] = 'imagenet', auxiliary_loss: bool = False): super().__init__() self.dataset = dataset self.num_labels = 10 if dataset == 'cifar' else 1000 self.auxiliary_loss = auxiliary_loss # preprocess the specified width and depth if isinstance(width, Iterable): C = nn.ValueChoice(list(width), label='width') else: C = width self.num_cells: nn.MaybeChoice[int] = cast(int, num_cells) if isinstance(num_cells, Iterable): self.num_cells = nn.ValueChoice(list(num_cells), label='depth') num_cells_per_stage = [ (i + 1) * self.num_cells // 3 - i * self.num_cells // 3 for i in range(3) ] # auxiliary head is different for network targetted at different datasets if dataset == 'imagenet': self.stem0 = nn.Sequential( nn.Conv2d(3, cast(int, C // 2), kernel_size=3, stride=2, padding=1, bias=False), nn.BatchNorm2d(cast(int, C // 2)), nn.ReLU(inplace=True), nn.Conv2d(cast(int, C // 2), cast(int, C), 3, stride=2, padding=1, bias=False), nn.BatchNorm2d(C), ) self.stem1 = nn.Sequential( nn.ReLU(inplace=True), nn.Conv2d(cast(int, C), cast(int, C), 3, stride=2, padding=1, bias=False), nn.BatchNorm2d(C), ) C_pprev = C_prev = C_curr = C last_cell_reduce = True elif dataset == 'cifar': self.stem = nn.Sequential( nn.Conv2d(3, cast(int, 3 * C), 3, padding=1, bias=False), nn.BatchNorm2d(cast(int, 3 * C))) C_pprev = C_prev = 3 * C C_curr = C last_cell_reduce = False else: raise ValueError(f'Unsupported dataset: {dataset}') self.stages = nn.ModuleList() for stage_idx in range(3): if stage_idx > 0: C_curr *= 2 # For a stage, we get C_in, C_curr, and C_out. # C_in is only used in the first cell. # C_curr is number of channels for each operator in current stage. # C_out is usually `C * num_nodes_per_cell` because of concat operator. cell_builder = CellBuilder(op_candidates, C_pprev, C_prev, C_curr, num_nodes_per_cell, merge_op, stage_idx > 0, last_cell_reduce) stage: Union[NDSStage, nn.Sequential] = NDSStage( cell_builder, num_cells_per_stage[stage_idx]) if isinstance(stage, NDSStage): stage.estimated_out_channels_prev = cast(int, C_prev) stage.estimated_out_channels = cast( int, C_curr * num_nodes_per_cell) stage.downsampling = stage_idx > 0 self.stages.append(stage) # NOTE: output_node_indices will be computed on-the-fly in trial code. # When constructing model space, it's just all the nodes in the cell, # which happens to be the case of one-shot supernet. # C_pprev is output channel number of last second cell among all the cells already built. if len(stage) > 1: # Contains more than one cell C_pprev = len(cast(nn.Cell, stage[-2]).output_node_indices) * C_curr else: # Look up in the out channels of last stage. C_pprev = C_prev # This was originally, # C_prev = num_nodes_per_cell * C_curr. # but due to loose end, it becomes, C_prev = len(cast(nn.Cell, stage[-1]).output_node_indices) * C_curr # Useful in aligning the pprev and prev cell. last_cell_reduce = cell_builder.last_cell_reduce if stage_idx == 2: C_to_auxiliary = C_prev if auxiliary_loss: assert isinstance( self.stages[2], nn.Sequential ), 'Auxiliary loss can only be enabled in retrain mode.' self.stages[2] = SequentialBreakdown( cast(nn.Sequential, self.stages[2])) self.auxiliary_head = AuxiliaryHead( C_to_auxiliary, self.num_labels, dataset=self.dataset) # type: ignore self.global_pooling = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Linear(cast(int, C_prev), self.num_labels)
def __init__(self, num_nodes): super().__init__() self.ops = nn.ModuleList() self.num_nodes = num_nodes for _ in range(num_nodes): self.ops.append(nn.Linear(16, 16))
def __init__(self, op_candidates: List[str], merge_op: Literal['all', 'loose_end'] = 'all', num_nodes_per_cell: int = 4, width: Union[Tuple[int], int] = 16, num_cells: Union[Tuple[int], int] = 20, dataset: Literal['cifar', 'imagenet'] = 'imagenet', auxiliary_loss: bool = False): super().__init__() self.dataset = dataset self.num_labels = 10 if dataset == 'cifar' else 1000 self.auxiliary_loss = auxiliary_loss # preprocess the specified width and depth if isinstance(width, Iterable): C = nn.ValueChoice(list(width), label='width') else: C = width if isinstance(num_cells, Iterable): num_cells = nn.ValueChoice(list(num_cells), label='depth') num_cells_per_stage = [ i * num_cells // 3 - (i - 1) * num_cells // 3 for i in range(3) ] # auxiliary head is different for network targetted at different datasets if dataset == 'imagenet': self.stem0 = nn.Sequential( nn.Conv2d(3, C // 2, kernel_size=3, stride=2, padding=1, bias=False), nn.BatchNorm2d(C // 2), nn.ReLU(inplace=True), nn.Conv2d(C // 2, C, 3, stride=2, padding=1, bias=False), nn.BatchNorm2d(C), ) self.stem1 = nn.Sequential( nn.ReLU(inplace=True), nn.Conv2d(C, C, 3, stride=2, padding=1, bias=False), nn.BatchNorm2d(C), ) C_pprev = C_prev = C_curr = C last_cell_reduce = True elif dataset == 'cifar': self.stem = nn.Sequential( nn.Conv2d(3, 3 * C, 3, padding=1, bias=False), nn.BatchNorm2d(3 * C)) C_pprev = C_prev = 3 * C C_curr = C last_cell_reduce = False self.stages = nn.ModuleList() for stage_idx in range(3): if stage_idx > 0: C_curr *= 2 # For a stage, we get C_in, C_curr, and C_out. # C_in is only used in the first cell. # C_curr is number of channels for each operator in current stage. # C_out is usually `C * num_nodes_per_cell` because of concat operator. cell_builder = CellBuilder(op_candidates, C_pprev, C_prev, C_curr, num_nodes_per_cell, merge_op, stage_idx > 0, last_cell_reduce) stage = nn.Repeat(cell_builder, num_cells_per_stage[stage_idx]) self.stages.append(stage) # C_pprev is output channel number of last second cell among all the cells already built. if len(stage) > 1: # Contains more than one cell C_pprev = len(stage[-2].output_node_indices) * C_curr else: # Look up in the out channels of last stage. C_pprev = C_prev # This was originally, # C_prev = num_nodes_per_cell * C_curr. # but due to loose end, it becomes, C_prev = len(stage[-1].output_node_indices) * C_curr # Useful in aligning the pprev and prev cell. last_cell_reduce = cell_builder.last_cell_reduce if stage_idx == 2: C_to_auxiliary = C_prev if auxiliary_loss: assert isinstance( self.stages[2], nn.Sequential ), 'Auxiliary loss can only be enabled in retrain mode.' self.stages[2] = SequentialBreakdown(self.stages[2]) self.auxiliary_head = AuxiliaryHead(C_to_auxiliary, self.num_labels, dataset=self.dataset) self.global_pooling = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Linear(C_prev, self.num_labels)
def __init__(self, width_stages=[24,40,80,96,192,320], n_cell_stages=[4,4,4,4,4,1], stride_stages=[2,2,2,1,2,1], width_mult=1, n_classes=1000, dropout_rate=0, bn_param=(0.1, 1e-3)): """ Parameters ---------- width_stages: str width (output channels) of each cell stage in the block n_cell_stages: str number of cells in each cell stage stride_strages: str stride of each cell stage in the block width_mult : int the scale factor of width """ super(SearchMobileNet, self).__init__() input_channel = putils.make_divisible(32 * width_mult, 8) first_cell_width = putils.make_divisible(16 * width_mult, 8) for i in range(len(width_stages)): width_stages[i] = putils.make_divisible(width_stages[i] * width_mult, 8) # first conv first_conv = ops.ConvLayer(3, input_channel, kernel_size=3, stride=2, use_bn=True, act_func='relu6', ops_order='weight_bn_act') # first block first_block_conv = ops.OPS['3x3_MBConv1'](input_channel, first_cell_width, 1) first_block = first_block_conv input_channel = first_cell_width blocks = [first_block] stage_cnt = 0 for width, n_cell, s in zip(width_stages, n_cell_stages, stride_stages): for i in range(n_cell): if i == 0: stride = s else: stride = 1 op_candidates = [ops.OPS['3x3_MBConv3'](input_channel, width, stride), ops.OPS['3x3_MBConv6'](input_channel, width, stride), ops.OPS['5x5_MBConv3'](input_channel, width, stride), ops.OPS['5x5_MBConv6'](input_channel, width, stride), ops.OPS['7x7_MBConv3'](input_channel, width, stride), ops.OPS['7x7_MBConv6'](input_channel, width, stride)] if stride == 1 and input_channel == width: # if it is not the first one op_candidates += [ops.OPS['Zero'](input_channel, width, stride)] conv_op = LayerChoice(op_candidates, label="s{}_c{}".format(stage_cnt, i)) else: conv_op = LayerChoice(op_candidates, label="s{}_c{}".format(stage_cnt, i)) # shortcut if stride == 1 and input_channel == width: # if not first cell shortcut = ops.IdentityLayer(input_channel, input_channel) else: shortcut = None inverted_residual_block = ops.MobileInvertedResidualBlock(conv_op, shortcut, op_candidates) blocks.append(inverted_residual_block) input_channel = width stage_cnt += 1 # feature mix layer last_channel = putils.make_devisible(1280 * width_mult, 8) if width_mult > 1.0 else 1280 feature_mix_layer = ops.ConvLayer(input_channel, last_channel, kernel_size=1, use_bn=True, act_func='relu6', ops_order='weight_bn_act', ) classifier = ops.LinearLayer(last_channel, n_classes, dropout_rate=dropout_rate) self.first_conv = first_conv self.blocks = nn.ModuleList(blocks) self.feature_mix_layer = feature_mix_layer self.global_avg_pooling = nn.AdaptiveAvgPool2d(1) self.classifier = classifier # set bn param self.set_bn_param(momentum=bn_param[0], eps=bn_param[1])