def __init__(self, inplanes, planes, stride=1, downsample=None,widen_factor = 1,vanilla_conv1 = True,vanilla_conv3 = True,initial_sparsity = 0.5, sub_kernel_granularity = 4,sparse = True): super(Bottleneck, self).__init__() adjusted_planes = planes#np.round(widen_factor * planes).astype('int32') if vanilla_conv1: self.conv1 = nn.Conv2d(inplanes, adjusted_planes, kernel_size=1, bias=False) self.conv3 = nn.Conv2d(adjusted_planes, planes * 4, kernel_size=1, bias=False) else: self.conv1 = DynamicConv2d(inplanes, adjusted_planes, kernel_size=1, bias=False , initial_sparsity = initial_sparsity, sub_kernel_granularity = sub_kernel_granularity,sparse = sparse ) self.conv3 = DynamicConv2d(adjusted_planes, planes * 4, kernel_size=1, bias=False , initial_sparsity = initial_sparsity, sub_kernel_granularity = sub_kernel_granularity,sparse = sparse) if vanilla_conv3: self.conv2 = nn.Conv2d(adjsuted_planes, adjusted_planes, kernel_size=3, stride=stride,padding=1, bias=False) else: self.conv2 = DynamicConv2d(adjusted_planes, adjusted_planes, kernel_size=3, stride=stride, padding=1, bias=False,initial_sparsity = initial_sparsity, sub_kernel_granularity = sub_kernel_granularity,sparse = sparse) self.bn1 = nn.BatchNorm2d(adjusted_planes) self.bn2 = nn.BatchNorm2d(adjusted_planes) self.bn3 = nn.BatchNorm2d(planes * 4) self.relu = nn.ReLU(inplace=True) self.downsample = downsample self.stride = stride
def __init__(self, in_planes, out_planes, stride, dropRate=0.0, widen_factor=10, initial_sparsity=0.5, sub_kernel_granularity=False, sparse=True, sparse_momentum=False): super(BasicBlock, self).__init__() self.bn1 = nn.BatchNorm2d(in_planes) self.relu1 = nn.ReLU(inplace=True) self.conv1 = DynamicConv2d( in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False, initial_sparsity=initial_sparsity, sub_kernel_granularity=sub_kernel_granularity, sparse=sparse) self.bn2 = nn.BatchNorm2d(out_planes) self.relu2 = nn.ReLU(inplace=True) self.conv2 = DynamicConv2d( out_planes, out_planes, kernel_size=3, stride=1, padding=1, bias=False, initial_sparsity=initial_sparsity, sub_kernel_granularity=sub_kernel_granularity, sparse=sparse) self.droprate = dropRate self.equalInOut = (in_planes == out_planes) #self.convShortcut = (not self.equalInOut) and nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, #padding=0, bias=False) or None self.convShortcut = (not self.equalInOut) and DynamicConv2d( in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False, initial_sparsity=initial_sparsity, sub_kernel_granularity=sub_kernel_granularity, sparse=sparse) or None
def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False) if self.vanilla_downsample else \ DynamicConv2d(self.inplanes, planes * block.expansion,kernel_size=1,stride=stride, bias=False, initial_sparsity = self.initial_sparsity_conv,sub_kernel_granularity = self.sub_kernel_granularity,sparse = self.sparse), nn.BatchNorm2d(planes * block.expansion), ) layers = [] layers.append( block(self.inplanes, planes, stride, downsample, widen_factor=self.widen_factor, vanilla_conv1=self.vanilla_conv1, vanilla_conv3=self.vanilla_conv3, initial_sparsity=self.initial_sparsity_conv, sub_kernel_granularity=self.sub_kernel_granularity, sparse=self.sparse)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append( block(self.inplanes, planes, widen_factor=self.widen_factor, vanilla_conv1=self.vanilla_conv1, vanilla_conv3=self.vanilla_conv3, initial_sparsity=self.initial_sparsity_conv, sub_kernel_granularity=self.sub_kernel_granularity, sparse=self.sparse)) return nn.Sequential(*layers)
def make_layers(self, cfg, batch_norm=False): layers = [] in_channels = 3 for v in cfg: if v == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: #conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1, bias=False) conv2d = DynamicConv2d(in_channels, v, kernel_size=3, padding=1, bias=False, initial_sparsity = self.initial_sparsity_conv,sparse = self.sparse) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v, affine=self._AFFINE), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = v return nn.Sequential(*layers)
def __init__(self, block, layers, num_classes=1000, widen_factor=1, vanilla_downsample=False, vanilla_conv1=False, vanilla_conv3=False, initial_sparsity_conv=0.5, initial_sparsity_fc=0.95, sub_kernel_granularity=4, sparse=True): self.inplanes = np.round(64 * widen_factor).astype('int32') super(ResNet, self).__init__() self.widen_factor = widen_factor self.vanilla_conv1 = vanilla_conv1 self.vanilla_conv3 = vanilla_conv3 self.vanilla_downsample = vanilla_downsample self.initial_sparsity_conv = initial_sparsity_conv self.initial_sparsity_fc = initial_sparsity_fc self.sub_kernel_granularity = sub_kernel_granularity self.sparse = sparse if not sparse: self.conv1 = nn.Conv2d(3, np.round(64 * widen_factor).astype('int32'), kernel_size=7, stride=2, padding=3, bias=False) else: self.conv1 = DynamicConv2d( 3, np.round(64 * widen_factor).astype('int32'), kernel_size=7, stride=2, padding=3, bias=False, initial_sparsity=initial_sparsity_conv, sub_kernel_granularity=sub_kernel_granularity, sparse=sparse) self.bn1 = nn.BatchNorm2d(np.round(64 * widen_factor).astype('int32')) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer( block, np.round(64 * widen_factor).astype('int32'), layers[0]) self.layer2 = self._make_layer( block, np.round(64 * widen_factor).astype('int32') * 2, layers[1], stride=2) self.layer3 = self._make_layer( block, np.round(64 * widen_factor).astype('int32') * 4, layers[2], stride=2) self.layer4 = self._make_layer( block, np.round(64 * widen_factor).astype('int32') * 8, layers[3], stride=2) self.avgpool = nn.AvgPool2d(7, stride=1) if not sparse: self.fc = nn.Linear(np.round(64 * widen_factor).astype('int32') * block.expansion * 8, num_classes, bias=True) else: self.fc = DynamicLinear( np.round(64 * widen_factor).astype('int32') * block.expansion * 8, num_classes, initial_sparsity=self.initial_sparsity_fc, sparse=sparse) for m in self.modules(): if isinstance(m, nn.Conv2d): if sparse: raise Exception( 'Used sparse=True, but some layers are still dense.') n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, DynamicConv2d): if not sparse: raise Exception( 'Used sparse=False, but some layers are still sparse.') n = m.kernel_size * m.kernel_size * m.n_output_maps if m.sparse: m.d_tensor.s_tensor.data.normal_(0, math.sqrt(2. / n)) else: m.d_tensor.bank.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()