def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride): super().__init__() self.kernel_size = kernel_size self.in_ch = in_ch self.out_ch = out_ch self.skip = skip self.exp_ratio = exp_ratio self.stride = stride mid_ch = in_ch * exp_ratio self.layers = nn.Sequential( # Pointwise nn.Conv2d(in_ch, mid_ch, 1, bias=False), nn.BatchNorm2d(mid_ch, momentum=BN_MOMENTUM), nn.ReLU(inplace=False), # Depthwise nn.Conv2d(mid_ch, mid_ch, kernel_size, padding=(kernel_size - 1) // 2, stride=stride, groups=mid_ch, bias=False), nn.BatchNorm2d(mid_ch, momentum=BN_MOMENTUM), nn.ReLU(inplace=False), # Linear pointwise. Note that there's no activation. nn.Conv2d(mid_ch, out_ch, 1, bias=False), nn.BatchNorm2d(out_ch, momentum=BN_MOMENTUM))
def _stack_normal_conv(in_ch, out_ch, kernel_size, skip, dconv, stride, repeats, bn_momentum): assert repeats >= 1 stack = [] for i in range(repeats): s = stride if i == 0 else 1 if dconv: modules = [ nn.Conv2d(in_ch, in_ch, kernel_size, padding=kernel_size // 2, stride=s, groups=in_ch, bias=False), nn.BatchNorm2d(in_ch, momentum=bn_momentum), nn.ReLU(inplace=True), nn.Conv2d(in_ch, out_ch, 1, padding=0, stride=1, bias=False), nn.BatchNorm2d(out_ch, momentum=bn_momentum) ] else: modules = [ nn.Conv2d(in_ch, out_ch, kernel_size, padding=kernel_size // 2, stride=s, bias=False), nn.ReLU(inplace=True), nn.BatchNorm2d(out_ch, momentum=bn_momentum) ] if skip and in_ch == out_ch and s == 1: # use different implementation for skip and noskip to align with pytorch stack.append(_ResidualBlock(nn.Sequential(*modules))) else: stack += modules in_ch = out_ch return stack
def __init__(self, in_ch, out_ch, kernel_size, stride, expansion_factor, skip, bn_momentum=0.1): super(_InvertedResidual, self).__init__() assert stride in [1, 2] assert kernel_size in [3, 5] mid_ch = in_ch * expansion_factor self.apply_residual = skip and in_ch == out_ch and stride == 1 self.layers = nn.Sequential( # Pointwise nn.Conv2d(in_ch, mid_ch, 1, bias=False), nn.BatchNorm2d(mid_ch, momentum=bn_momentum), nn.ReLU(inplace=False), # Depthwise nn.Conv2d(mid_ch, mid_ch, kernel_size, padding=kernel_size // 2, stride=stride, groups=mid_ch, bias=False), nn.BatchNorm2d(mid_ch, momentum=bn_momentum), nn.ReLU(inplace=False), # Linear pointwise. Note that there's no activation. nn.Conv2d(mid_ch, out_ch, 1, bias=False), nn.BatchNorm2d(out_ch, momentum=bn_momentum))
def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True): super().__init__( nn.ReLU(inplace=False), nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=C_in, bias=False), nn.Conv2d(C_in, C_in, kernel_size=1, padding=0, bias=False), nn.BatchNorm2d(C_in, affine=affine), nn.ReLU(inplace=False), nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=1, padding=padding, dilation=dilation, groups=C_in, bias=False), nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False), nn.BatchNorm2d(C_out, affine=affine), )
def __init__(self, alpha, depths, convops, kernel_sizes, num_layers, skips, num_classes=1000, dropout=0.2): super().__init__() assert alpha > 0.0 assert len(depths) == len(convops) == len(kernel_sizes) == len(num_layers) == len(skips) == 7 self.alpha = alpha self.num_classes = num_classes depths = _get_depths([_FIRST_DEPTH] + depths, alpha) base_filter_sizes = [16, 24, 40, 80, 96, 192, 320] exp_ratios = [3, 3, 3, 6, 6, 6, 6] strides = [1, 2, 2, 2, 1, 2, 1] layers = [ # First layer: regular conv. nn.Conv2d(3, depths[0], 3, padding=1, stride=2, bias=False), nn.BatchNorm2d(depths[0], momentum=_BN_MOMENTUM), nn.ReLU(inplace=True), ] count = 0 # for conv, prev_depth, depth, ks, skip, stride, repeat, exp_ratio in \ # zip(convops, depths[:-1], depths[1:], kernel_sizes, skips, strides, num_layers, exp_ratios): for filter_size, exp_ratio, stride in zip(base_filter_sizes, exp_ratios, strides): # TODO: restrict that "choose" can only be used within mutator ph = nn.Placeholder(label=f'mutable_{count}', **{ 'kernel_size_options': [1, 3, 5], 'n_layer_options': [1, 2, 3, 4], 'op_type_options': ['__mutated__.base_mnasnet.RegularConv', '__mutated__.base_mnasnet.DepthwiseConv', '__mutated__.base_mnasnet.MobileConv'], # 'se_ratio_options': [0, 0.25], 'skip_options': ['identity', 'no'], 'n_filter_options': [int(filter_size*x) for x in [0.75, 1.0, 1.25]], 'exp_ratio': exp_ratio, 'stride': stride, 'in_ch': depths[0] if count == 0 else None }) layers.append(ph) '''if conv == "mconv": # MNASNet blocks: stacks of inverted residuals. layers.append(_stack_inverted_residual(prev_depth, depth, ks, skip, stride, exp_ratio, repeat, _BN_MOMENTUM)) else: # Normal conv and depth-separated conv layers += _stack_normal_conv(prev_depth, depth, ks, skip, conv == "dconv", stride, repeat, _BN_MOMENTUM)''' count += 1 if count >= 2: break layers += [ # Final mapping to classifier input. nn.Conv2d(depths[7], 1280, 1, padding=0, stride=1, bias=False), nn.BatchNorm2d(1280, momentum=_BN_MOMENTUM), nn.ReLU(inplace=True), ] self.layers = nn.Sequential(*layers) self.classifier = nn.Sequential(nn.Dropout(p=dropout, inplace=True), nn.Linear(1280, num_classes)) self._initialize_weights()
def __init__(self, input_size, C, n_classes): """ assuming input size 7x7 or 8x8 """ assert input_size in [7, 8] super().__init__() self.net = nn.Sequential( nn.ReLU(inplace=True), nn.AvgPool2d(5, stride=input_size - 5, padding=0, count_include_pad=False), # 2x2 out nn.Conv2d(C, 128, kernel_size=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(inplace=True), nn.Conv2d(128, 768, kernel_size=2, bias=False), # 1x1 out nn.BatchNorm2d(768), nn.ReLU(inplace=True) ) self.linear = nn.Linear(768, n_classes)
def __init__(self, C: int, num_labels: int, dataset: Literal['imagenet', 'cifar']): super().__init__() if dataset == 'imagenet': # assuming input size 14x14 stride = 2 elif dataset == 'cifar': stride = 3 self.features = nn.Sequential( nn.ReLU(inplace=True), nn.AvgPool2d(5, stride=stride, padding=0, count_include_pad=False), nn.Conv2d(C, 128, 1, bias=False), nn.BatchNorm2d(128), nn.ReLU(inplace=True), nn.Conv2d(128, 768, 2, bias=False), nn.BatchNorm2d(768), nn.ReLU(inplace=True)) self.classifier = nn.Linear(768, num_labels)
def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True): super().__init__() self.net = nn.Sequential( nn.ReLU(), nn.Conv2d(C_in, C_out, kernel_size, stride, padding, bias=False), nn.BatchNorm2d(C_out, affine=affine) )
def __init__(self): super(TransformerNet, self).__init__() # Initial convolution layers self.conv1 = ConvLayer(3, 32, kernel_size=9, stride=1) self.in1 = nn.InstanceNorm2d(32, affine=True) self.conv2 = ConvLayer(32, 64, kernel_size=3, stride=2) self.in2 = nn.InstanceNorm2d(64, affine=True) self.conv3 = ConvLayer(64, 128, kernel_size=3, stride=2) self.in3 = nn.InstanceNorm2d(128, affine=True) # Residual layers self.res1 = ResidualBlock(128) self.res2 = ResidualBlock(128) self.res3 = ResidualBlock(128) self.res4 = ResidualBlock(128) self.res5 = ResidualBlock(128) # Upsampling Layers self.deconv1 = UpsampleConvLayer(128, 64, kernel_size=3, stride=1, upsample=2) self.in4 = nn.InstanceNorm2d(64, affine=True) self.deconv2 = UpsampleConvLayer(64, 32, kernel_size=3, stride=1, upsample=2) self.in5 = nn.InstanceNorm2d(32, affine=True) self.deconv3 = ConvLayer(32, 3, kernel_size=9, stride=1) # Non-linearities self.relu = nn.ReLU()
def __init__(self, kernel_size, in_ch, out_ch, skip, exp_ratio, stride): super().__init__() self.kernel_size = kernel_size self.in_ch = in_ch self.out_ch = out_ch self.skip = skip self.exp_ratio = exp_ratio self.stride = stride self.conv1 = nn.Conv2d(in_ch, in_ch, kernel_size, padding=kernel_size // 2, stride=stride, groups=in_ch, bias=False) self.bn1 = nn.BatchNorm2d(in_ch, momentum=BN_MOMENTUM) self.relu = nn.ReLU(inplace=False) self.conv2 = nn.Conv2d(in_ch, out_ch, 1, padding=0, stride=1, bias=False) self.bn2 = nn.BatchNorm2d(out_ch, momentum=BN_MOMENTUM)
def __init__(self, block, layers, num_classes=1000): super(ResNet, self).__init__() self.inplanes = 64 self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): torch.nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): torch.nn.init.constant_(m.weight, 1) torch.nn.init.constant_(m.bias, 0)
def __init__(self, input_size=224, first_conv_channels=16, last_conv_channels=1024, n_classes=1000, affine=False): super().__init__() assert input_size % 32 == 0 self.stage_blocks = [4, 4, 8, 4] self.stage_channels = [64, 160, 320, 640] self._input_size = input_size self._feature_map_size = input_size self._first_conv_channels = first_conv_channels self._last_conv_channels = last_conv_channels self._n_classes = n_classes self._affine = affine self._layerchoice_count = 0 # building first layer self.first_conv = nn.Sequential( nn.Conv2d(3, first_conv_channels, 3, 2, 1, bias=False), nn.BatchNorm2d(first_conv_channels, affine=affine), nn.ReLU(inplace=True), ) self._feature_map_size //= 2 p_channels = first_conv_channels features = [] for num_blocks, channels in zip(self.stage_blocks, self.stage_channels): features.extend(self._make_blocks(num_blocks, p_channels, channels)) p_channels = channels self.features = nn.Sequential(*features) self.conv_last = nn.Sequential( nn.Conv2d(p_channels, last_conv_channels, 1, 1, 0, bias=False), nn.BatchNorm2d(last_conv_channels, affine=affine), nn.ReLU(inplace=True), ) self.globalpool = nn.AvgPool2d(self._feature_map_size) self.dropout = nn.Dropout(0.1) self.classifier = nn.Sequential( nn.Linear(last_conv_channels, n_classes, bias=False), ) self._initialize_weights()
def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True): super().__init__( nn.ReLU(inplace=False), nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False), nn.BatchNorm2d(C_out, affine=affine))
def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True): super().__init__() self.net = nn.Sequential( nn.ReLU(), nn.Conv2d(C_in, C_in, kernel_size, stride, padding, dilation=dilation, groups=C_in, bias=False), nn.Conv2d(C_in, C_out, 1, stride=1, padding=0, bias=False), nn.BatchNorm2d(C_out, affine=affine) )
def __init__(self, inplanes, planes, stride=1, downsample=None): super(BasicBlock, self).__init__() self.conv1 = conv3x3(inplanes, planes, stride) self.bn1 = nn.BatchNorm2d(planes) self.relu = nn.ReLU(inplace=True) self.conv2 = conv3x3(planes, planes) self.bn2 = nn.BatchNorm2d(planes) self.downsample = downsample self.stride = stride
def __init__(self, upscale_factor): super(Net, self).__init__() self.relu = nn.ReLU() self.conv1 = nn.Conv2d(1, 64, (5, 5), (1, 1), (2, 2)) self.conv2 = nn.Conv2d(64, 64, (3, 3), (1, 1), (1, 1)) self.conv3 = nn.Conv2d(64, 32, (3, 3), (1, 1), (1, 1)) self.conv4 = nn.Conv2d(32, upscale_factor**2, (3, 3), (1, 1), (1, 1)) self.pixel_shuffle = nn.PixelShuffle(upscale_factor)
def __init__(self, channels): super(ResidualBlock, self).__init__() self.conv1 = ConvLayer(channels, channels, kernel_size=3, stride=1) self.in1 = nn.InstanceNorm2d(channels, affine=True) self.conv2 = ConvLayer(channels, channels, kernel_size=3, stride=1) self.in2 = nn.InstanceNorm2d(channels, affine=True) self.relu = nn.ReLU()
def __init__(self, channels: int, reduction: int = 4, activation_layer: Optional[Callable[..., nn.Module]] = None): super().__init__() if activation_layer is None: activation_layer = nn.Sigmoid self.avg_pool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Sequential( nn.Linear(channels, make_divisible(channels // reduction, 8)), nn.ReLU(inplace=True), nn.Linear(make_divisible(channels // reduction, 8), channels), activation_layer())
def build_activation(act_func, inplace=True): if act_func == 'relu': return nn.ReLU(inplace=inplace) elif act_func == 'relu6': return nn.ReLU6(inplace=inplace) elif act_func == 'tanh': return nn.Tanh() elif act_func == 'sigmoid': return nn.Sigmoid() elif act_func is None: return None else: raise ValueError('do not support: %s' % act_func)
def __init__(self, nz, ngf, nc): super(DCGANGenerator, self).__init__() self.main = nn.Sequential( # input is Z, going into a convolution nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False), nn.BatchNorm2d(ngf * 8), nn.ReLU(True), # state size. (ngf*8) x 4 x 4 nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 4), nn.ReLU(True), # state size. (ngf*4) x 8 x 8 nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 2), nn.ReLU(True), # state size. (ngf*2) x 16 x 16 nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf), nn.ReLU(True), # state size. (ngf) x 32 x 32 nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False), nn.Tanh() # state size. (nc) x 64 x 64 )
def __init__(self, C_in, C_out, affine=True): super().__init__() self.relu = nn.ReLU() self.conv1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False) self.conv2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False) self.bn = nn.BatchNorm2d(C_out, affine=affine)
def __init__(self, config): super(SNLIClassifier, self).__init__() self.config = config self.embed = nn.Embedding(config.n_embed, config.d_embed) self.projection = Linear(config.d_embed, config.d_proj) self.encoder = Encoder(config) self.dropout = nn.Dropout(p=config.dp_ratio) self.relu = nn.ReLU() seq_in_size = 2 * config.d_hidden if self.config.birnn: seq_in_size *= 2 lin_config = [seq_in_size] * 2 self.out = nn.Sequential(Linear(*lin_config), self.relu, self.dropout, Linear(*lin_config), self.relu, self.dropout, Linear(*lin_config), self.relu, self.dropout, Linear(seq_in_size, config.d_out))
def __init__(self, in_channels: int, out_channels: int, mid_channels: int, *, kernel_size: int, stride: int, sequence: str = "pdp", affine: bool = True): super().__init__() assert stride in [1, 2] assert kernel_size in [3, 5, 7] self.channels = in_channels // 2 if stride == 1 else in_channels self.in_channels = in_channels self.out_channels = out_channels self.mid_channels = mid_channels self.kernel_size = kernel_size self.stride = stride self.pad = kernel_size // 2 self.oup_main = out_channels - self.channels self.affine = affine assert self.oup_main > 0 self.branch_main = nn.Sequential( *self._decode_point_depth_conv(sequence)) if stride == 2: self.branch_proj = nn.Sequential( # dw nn.Conv2d(self.channels, self.channels, kernel_size, stride, self.pad, groups=self.channels, bias=False), nn.BatchNorm2d(self.channels, affine=affine), # pw-linear nn.Conv2d(self.channels, self.channels, 1, 1, 0, bias=False), nn.BatchNorm2d(self.channels, affine=affine), nn.ReLU(inplace=True)) else: # empty block to be compatible with torchscript self.branch_proj = nn.Sequential()
def __init__(self, config): super(SNLIClassifier, self).__init__() self.embed = nn.Embedding(config["n_embed"], config["d_embed"]) self.projection = Linear(config["d_embed"], config["d_proj"]) self.encoder = Encoder(config) self.dropout = nn.Dropout(p=config["dp_ratio"]) self.relu = nn.ReLU() seq_in_size = 2 * config["d_hidden"] if config["birnn"]: seq_in_size *= 2 lin_config = [seq_in_size] * 2 self.out = nn.Sequential(Linear(*lin_config), self.relu, self.dropout, Linear(*lin_config), self.relu, self.dropout, Linear(*lin_config), self.relu, self.dropout, Linear(seq_in_size, config["d_out"])) self.fix_emb = config["fix_emb"] self.project = config["projection"]
def __init__(self, C_in, C_out, kernel_length, stride, padding, affine=True): super(FacConv, self).__init__() self.net = nn.Sequential( nn.ReLU(), nn.Conv2d(C_in, C_in, (kernel_length, 1), stride, padding, bias=False), nn.Conv2d(C_in, C_out, (1, kernel_length), stride, padding, bias=False), nn.BatchNorm2d(C_out, affine=affine))
def __init__(self, C_in, C_out, affine=True): super().__init__() if isinstance(C_out, int): assert C_out % 2 == 0 else: # is a value choice assert all(c % 2 == 0 for c in C_out.all_options()) self.relu = nn.ReLU(inplace=False) self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False) self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False) self.bn = nn.BatchNorm2d(C_out, affine=affine) self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0)
def _decode_point_depth_conv(self, sequence): result = [] first_depth = first_point = True pc: int = self.channels c: int = self.channels for i, token in enumerate(sequence): # compute output channels of this conv if i + 1 == len(sequence): assert token == "p", "Last conv must be point-wise conv." c = self.oup_main elif token == "p" and first_point: c = cast(int, self.mid_channels) if token == "d": # depth-wise conv if isinstance(pc, int) and isinstance(c, int): # check can only be done for static channels assert pc == c, "Depth-wise conv must not change channels." result.append( nn.Conv2d(pc, c, self.kernel_size, self.stride if first_depth else 1, self.pad, groups=c, bias=False)) result.append(nn.BatchNorm2d(c, affine=self.affine)) first_depth = False elif token == "p": # point-wise conv result.append(nn.Conv2d(pc, c, 1, 1, 0, bias=False)) result.append(nn.BatchNorm2d(c, affine=self.affine)) result.append(nn.ReLU(inplace=True)) first_point = False else: raise ValueError("Conv sequence must be d and p.") pc = c return result
def __init__(self, op_candidates: List[str], merge_op: Literal['all', 'loose_end'] = 'all', num_nodes_per_cell: int = 4, width: Union[Tuple[int, ...], int] = 16, num_cells: Union[Tuple[int, ...], int] = 20, dataset: Literal['cifar', 'imagenet'] = 'imagenet', auxiliary_loss: bool = False): super().__init__() self.dataset = dataset self.num_labels = 10 if dataset == 'cifar' else 1000 self.auxiliary_loss = auxiliary_loss # preprocess the specified width and depth if isinstance(width, Iterable): C = nn.ValueChoice(list(width), label='width') else: C = width self.num_cells: nn.MaybeChoice[int] = cast(int, num_cells) if isinstance(num_cells, Iterable): self.num_cells = nn.ValueChoice(list(num_cells), label='depth') num_cells_per_stage = [ (i + 1) * self.num_cells // 3 - i * self.num_cells // 3 for i in range(3) ] # auxiliary head is different for network targetted at different datasets if dataset == 'imagenet': self.stem0 = nn.Sequential( nn.Conv2d(3, cast(int, C // 2), kernel_size=3, stride=2, padding=1, bias=False), nn.BatchNorm2d(cast(int, C // 2)), nn.ReLU(inplace=True), nn.Conv2d(cast(int, C // 2), cast(int, C), 3, stride=2, padding=1, bias=False), nn.BatchNorm2d(C), ) self.stem1 = nn.Sequential( nn.ReLU(inplace=True), nn.Conv2d(cast(int, C), cast(int, C), 3, stride=2, padding=1, bias=False), nn.BatchNorm2d(C), ) C_pprev = C_prev = C_curr = C last_cell_reduce = True elif dataset == 'cifar': self.stem = nn.Sequential( nn.Conv2d(3, cast(int, 3 * C), 3, padding=1, bias=False), nn.BatchNorm2d(cast(int, 3 * C))) C_pprev = C_prev = 3 * C C_curr = C last_cell_reduce = False else: raise ValueError(f'Unsupported dataset: {dataset}') self.stages = nn.ModuleList() for stage_idx in range(3): if stage_idx > 0: C_curr *= 2 # For a stage, we get C_in, C_curr, and C_out. # C_in is only used in the first cell. # C_curr is number of channels for each operator in current stage. # C_out is usually `C * num_nodes_per_cell` because of concat operator. cell_builder = CellBuilder(op_candidates, C_pprev, C_prev, C_curr, num_nodes_per_cell, merge_op, stage_idx > 0, last_cell_reduce) stage: Union[NDSStage, nn.Sequential] = NDSStage( cell_builder, num_cells_per_stage[stage_idx]) if isinstance(stage, NDSStage): stage.estimated_out_channels_prev = cast(int, C_prev) stage.estimated_out_channels = cast( int, C_curr * num_nodes_per_cell) stage.downsampling = stage_idx > 0 self.stages.append(stage) # NOTE: output_node_indices will be computed on-the-fly in trial code. # When constructing model space, it's just all the nodes in the cell, # which happens to be the case of one-shot supernet. # C_pprev is output channel number of last second cell among all the cells already built. if len(stage) > 1: # Contains more than one cell C_pprev = len(cast(nn.Cell, stage[-2]).output_node_indices) * C_curr else: # Look up in the out channels of last stage. C_pprev = C_prev # This was originally, # C_prev = num_nodes_per_cell * C_curr. # but due to loose end, it becomes, C_prev = len(cast(nn.Cell, stage[-1]).output_node_indices) * C_curr # Useful in aligning the pprev and prev cell. last_cell_reduce = cell_builder.last_cell_reduce if stage_idx == 2: C_to_auxiliary = C_prev if auxiliary_loss: assert isinstance( self.stages[2], nn.Sequential ), 'Auxiliary loss can only be enabled in retrain mode.' self.stages[2] = SequentialBreakdown( cast(nn.Sequential, self.stages[2])) self.auxiliary_head = AuxiliaryHead( C_to_auxiliary, self.num_labels, dataset=self.dataset) # type: ignore self.global_pooling = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = nn.Linear(cast(int, C_prev), self.num_labels)
lambda C, stride, affine: nn.AvgPool2d( 5, stride=stride, padding=2, count_include_pad=False), 'max_pool_2x2': lambda C, stride, affine: nn.MaxPool2d(2, stride=stride, padding=0), 'max_pool_3x3': lambda C, stride, affine: nn.MaxPool2d(3, stride=stride, padding=1), 'max_pool_5x5': lambda C, stride, affine: nn.MaxPool2d(5, stride=stride, padding=2), 'max_pool_7x7': lambda C, stride, affine: nn.MaxPool2d(7, stride=stride, padding=3), 'skip_connect': lambda C, stride, affine: nn.Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine), 'conv_1x1': lambda C, stride, affine: nn.Sequential( nn.ReLU(inplace=False), nn.Conv2d(C, C, 1, stride=stride, padding=0, bias=False), nn.BatchNorm2d(C, affine=affine)), 'conv_3x3': lambda C, stride, affine: nn.Sequential( nn.ReLU(inplace=False), nn.Conv2d(C, C, 3, stride=stride, padding=1, bias=False), nn.BatchNorm2d(C, affine=affine)), 'sep_conv_3x3': lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine), 'sep_conv_5x5': lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine), 'sep_conv_7x7': lambda C, stride, affine: SepConv(C, C, 7, stride, 3, affine=affine), 'dil_conv_3x3': lambda C, stride, affine: DilConv(C, C, 3, stride, 2, 2, affine=affine),
def __init__(self): super().__init__( nn.Conv2d(3, 3, 1, 1, bias=False), nn.BatchNorm2d(3), nn.ReLU(inplace=False) )