def __init__(self, nonlin=nn.ReLU, use_bn=False, input_shape=(3, 32, 32)): super(ConvNet4, self).__init__() # self.nonlin = nonlin self.use_bn = use_bn self.conv1_size = 32 # 64 #32 self.conv2_size = 64 # 128 #64 self.fc1_size = 1024 # 200 #500 #1024 self.fc2_size = 10 # 1024 #200 #500 #1024 block1 = OrderedDict([ ('conv1', nn.Conv2d(input_shape[0], self.conv1_size, kernel_size=5, padding=3)), ('maxpool1', nn.MaxPool2d(2)), ('nonlin1', nonlin()) ]) block2 = OrderedDict([ ('conv2', nn.Conv2d(self.conv1_size, self.conv2_size, kernel_size=5, padding=2)), ('maxpool2', nn.MaxPool2d(2)), ('nonlin2', nonlin()), ]) block3 = OrderedDict([ ('batchnorm1', nn.BatchNorm2d(self.conv2_size)), ('reshape1', ReshapeBatch(-1)), ('fc1', nn.Linear((input_shape[1] // 4) * (input_shape[2] // 4) * self.conv2_size, self.fc1_size)), ('nonlin3', nonlin()), ]) block4 = OrderedDict([ ('batchnorm2', nn.BatchNorm1d(self.fc1_size)), ('fc2', nn.Linear(self.fc1_size, self.fc2_size)) ]) if not self.use_bn: del block3['batchnorm1'] del block4['batchnorm2'] self.all_modules = nn.Sequential(OrderedDict([ ('block1', nn.Sequential(block1)), ('block2', nn.Sequential(block2)), ('block3', nn.Sequential(block3)), ('block4', nn.Sequential(block4)) ]))
def __init__(self, nonlin=nn.ReLU, input_shape=(3, 32, 32), filter_frac=1.0): super(FCNet, self).__init__() num_input_channels = input_shape[0] * input_shape[1] * input_shape[2] num_filters1 = round(784 * filter_frac) num_filters2 = round(1024 * filter_frac) block0 = OrderedDict([('reshape', ReshapeBatch(-1)), ('fc0', nn.Linear(num_input_channels, num_filters1)), ('nonlin0', nonlin())]) block1 = OrderedDict([('fc1', nn.Linear(num_filters1, num_filters2)), ('nonlin1', nonlin())]) block2 = OrderedDict([('fc2', nn.Linear(num_filters2, num_filters2)), ('nonlin2', nonlin())]) block3 = OrderedDict([('fc3', nn.Linear(num_filters2, num_filters2)), ('nonlin3', nonlin())]) block4 = OrderedDict([ ('fc1', nn.Linear(num_filters2, 10)), ]) self.all_modules = nn.Sequential( OrderedDict([ ('block0', nn.Sequential(block0)), ('block1', nn.Sequential(block1)), ('block2', nn.Sequential(block2)), ('block3', nn.Sequential(block3)), ('block4', nn.Sequential(block4)), ]))
def __init__(self, nonlin=nn.ReLU, no_step_last=False, num_classes=1000): super(AlexNet, self).__init__() nl_name = 'nonlin' # nonlin.__name__ block0 = OrderedDict([ ('conv1', nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2)), ('{}1'.format(nl_name), nonlin()), ]) block1 = OrderedDict([ ('maxpool1', nn.MaxPool2d(kernel_size=3, stride=2)), ('conv2', nn.Conv2d(64, 192, kernel_size=5, padding=2)), ('{}2'.format(nl_name), nonlin()), ]) block2 = OrderedDict([ ('maxpool2', nn.MaxPool2d(kernel_size=3, stride=2)), ('conv3', nn.Conv2d(192, 384, kernel_size=3, padding=1)), ('{}3'.format(nl_name), nonlin()), ]) block3 = OrderedDict([ ('conv4', nn.Conv2d(384, 256, kernel_size=3, padding=1)), ('{}4'.format(nl_name), nonlin()), ]) block4 = OrderedDict([ ('conv5', nn.Conv2d(256, 256, kernel_size=3, padding=1)), ('{}5'.format(nl_name), nonlin()), ]) block5 = OrderedDict([ ('maxpool3', nn.MaxPool2d(kernel_size=3, stride=2)), ('reshape1', ReshapeBatch(-1)), ('dropout1', nn.Dropout()), ('fc1', nn.Linear(256 * 6 * 6, 4096)), ('{}6'.format(nl_name), nonlin()), ]) block6 = OrderedDict([ ('dropout2', nn.Dropout()), ('fc2', nn.Linear(4096, 4096)), # ('{}7'.format(nl_name), nonlin()), ]) if not no_step_last: block6['{}7'.format(nl_name)] = nonlin() block7 = OrderedDict([('fc3', nn.Linear(4096, num_classes))]) else: block6['{}7'.format(nl_name)] = nn.ReLU() block6['fc3'] = nn.Linear(4096, num_classes) block7 = None layers_od = OrderedDict([ ('block0', nn.Sequential(block0)), ('block1', nn.Sequential(block1)), ('block2', nn.Sequential(block2)), ('block3', nn.Sequential(block3)), ('block4', nn.Sequential(block4)), ('block5', nn.Sequential(block5)), ('block6', nn.Sequential(block6)), ]) if block7 is not None: layers_od['block7'] = nn.Sequential(block7) self.layers = nn.Sequential(layers_od)
def __init__(self, nonlin=nn.ReLU, use_bn=True, input_shape=(3, 40, 40), no_step_last=False): super(ConvNet8, self).__init__() self.use_bn = use_bn bias = not use_bn if input_shape[1] == 40: pad0 = 0 ks6 = 5 elif input_shape[1] == 32: pad0 = 2 ks6 = 4 else: raise NotImplementedError( 'no other input sizes are currently supported') # TODO: DoReFaNet uses a weird activation: f(x) = min(1, abs(x)) block0 = OrderedDict([ ('conv0', nn.Conv2d(3, 48, kernel_size=5, padding=pad0, bias=True)), # padding = valid ('maxpool0', nn.MaxPool2d(2)), # padding = same ('nonlin1', nonlin()) # 18 ]) block1 = OrderedDict([ ('conv1', nn.Conv2d(48, 64, kernel_size=3, padding=1, bias=bias)), # padding = same ('batchnorm1', nn.BatchNorm2d(64, eps=1e-4)), ('nonlin1', nonlin()), ]) block2 = OrderedDict([ ('conv2', nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=bias)), # padding = same ('batchnorm2', nn.BatchNorm2d(64, eps=1e-4)), ('maxpool1', nn.MaxPool2d(2)), # padding = same ('nonlin2', nonlin()), # 9 ]) block3 = OrderedDict([ ('conv3', nn.Conv2d(64, 128, kernel_size=3, padding=0, bias=bias)), # padding = valid ('batchnorm3', nn.BatchNorm2d(128, eps=1e-4)), ('nonlin3', nonlin()), # 7 ]) block4 = OrderedDict([ ('conv4', nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=bias)), # padding = same ('batchnorm4', nn.BatchNorm2d(128, eps=1e-4)), ('nonlin4', nonlin()), ]) block5 = OrderedDict([ ('conv5', nn.Conv2d(128, 128, kernel_size=3, padding=0, bias=bias)), # padding = valid ('batchnorm5', nn.BatchNorm2d(128, eps=1e-4)), ('nonlin5', nonlin()), # 5 ]) block6 = OrderedDict([ ('dropout', nn.Dropout2d()), ('conv6', nn.Conv2d(128, 512, kernel_size=ks6, padding=0, bias=bias)), # padding = valid ('batchnorm6', nn.BatchNorm2d(512, eps=1e-4)), ('nonlin6', nonlin() if not no_step_last else CAbs()), # ('nonlin6', nonlin() if not relu_last_layer else nn.ReLU()), ]) block7 = OrderedDict([('reshape_fc1', ReshapeBatch(-1)), ('fc1', nn.Linear(512, 10, bias=True))]) if not self.use_bn: del block1['batchnorm1'] del block2['batchnorm2'] del block3['batchnorm3'] del block4['batchnorm4'] del block5['batchnorm5'] del block6['batchnorm6'] self.all_modules = nn.Sequential( OrderedDict([ ('block0', nn.Sequential(block0)), ('block1', nn.Sequential(block1)), ('block2', nn.Sequential(block2)), ('block3', nn.Sequential(block3)), ('block4', nn.Sequential(block4)), ('block5', nn.Sequential(block5)), ('block6', nn.Sequential(block6)), ('block7', nn.Sequential(block7)), ]))
def __init__(self, nonlin=nn.ReLU, no_step_last=False, use_bn=True, num_classes=1000, data_parallel=False): super(AlexNet, self).__init__() nl_name = 'nonlin' # nonlin.__name__ bias = not use_bn block0 = OrderedDict([ ('conv0', nn.Conv2d(3, 96, kernel_size=12, stride=4, padding=0, bias=bias)), # padding=valid ('{}0'.format(nl_name), nonlin()), ]) # TODO: split conv2d for: conv1, conv3, conv4 block1 = OrderedDict([ ('conv1', nn.Conv2d(96, 256, kernel_size=5, padding=2, bias=bias)), # padding=same ('batchnorm1', nn.BatchNorm2d(256, eps=1e-4)), ('maxpool1', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), # padding = same ('{}1'.format(nl_name), nonlin()), ]) block2 = OrderedDict([ ('conv2', nn.Conv2d(256, 384, kernel_size=3, bias=bias, padding=1)), # padding = same ('batchnorm2', nn.BatchNorm2d(384, eps=1e-4)), ('maxpool2', nn.MaxPool2d(kernel_size=3, stride=2)), # padding = same ('{}2'.format(nl_name), nonlin()), ]) block3 = OrderedDict([ ('conv3', nn.Conv2d(384, 384, kernel_size=3, padding=1, bias=bias)), # padding = same ('batchnorm3', nn.BatchNorm2d(384, eps=1e-4)), ('{}3'.format(nl_name), nonlin()), ]) block4 = OrderedDict([ ('conv4', nn.Conv2d(384, 256, kernel_size=3, padding=1, bias=bias)), # padding = same ('batchnorm4', nn.BatchNorm2d(256, eps=1e-4)), ('maxpool4', nn.MaxPool2d(kernel_size=3, stride=2, padding=0)), # padding = valid ('{}4'.format(nl_name), nonlin()), ]) block5 = OrderedDict([ ('reshape_fc0', ReshapeBatch(-1)), ('fc0', nn.Linear(256 * 6 * 6, 4096, bias=bias)), ('batchnorm_fc0', nn.BatchNorm2d(4096, eps=1e-4)), ('{}5'.format(nl_name), nonlin()), ]) block6 = OrderedDict([ ('fc1', nn.Linear(4096, 4096, bias=bias)), ('batchnorm_fc1', nn.BatchNorm2d(4096, eps=1e-4)), ]) if not no_step_last: block6['{}6'.format(nl_name)] = nonlin() block7 = OrderedDict() final_block = block7 else: block6['ReLU1'] = nn.ReLU() block7 = None final_block = block6 final_block['fc2'] = nn.Linear(4096, num_classes, bias=True) if not use_bn: del block1['batchnorm1'] del block2['batchnorm2'] del block3['batchnorm3'] del block4['batchnorm4'] del block5['batchnorm_fc0'] del block6['batchnorm_fc1'] # self.blocks = [block0, block1, block2, block3, block4, block5, block6, block7] if data_parallel: conv_layers = nn.DataParallel(nn.Sequential(OrderedDict([ ('block0', nn.Sequential(block0)), ('block1', nn.Sequential(block1)), ('block2', nn.Sequential(block2)), ('block3', nn.Sequential(block3)), ('block4', nn.Sequential(block4)), ]))) fc_layers = OrderedDict([ ('block5', nn.Sequential(block5)), ('block6', nn.Sequential(block6)), ]) if block7 is not None: fc_layers['block7'] = nn.Sequential(block7) self.layers = nn.Sequential(conv_layers, nn.Sequential(fc_layers)) else: od_layers = OrderedDict([ ('block0', nn.Sequential(block0)), ('block1', nn.Sequential(block1)), ('block2', nn.Sequential(block2)), ('block3', nn.Sequential(block3)), ('block4', nn.Sequential(block4)), ('block5', nn.Sequential(block5)), ('block6', nn.Sequential(block6)), ]) if block7 is not None: od_layers['block7'] = nn.Sequential(block7) self.layers = nn.Sequential(od_layers)
def __init__(self, nonlin=nn.ReLU, use_batchnorm=True, use_dropout=True, input_shape=(3, 40, 40), num_classes=10, no_step_last=False, separate_activations=True, multi_gpu_modules=False): super().__init__() self.use_batchnorm = use_batchnorm self.use_dropout = use_dropout self.separate_activations = separate_activations bias = not use_batchnorm if input_shape[1] == 40: pad0 = 0 ks6 = 5 elif input_shape[1] == 32: pad0 = 2 ks6 = 4 else: raise NotImplementedError( "No other input sizes are currently supported") self.input_sizes = [ list(input_shape), None, None, None, None, None, None, ] block0 = OrderedDict([ # padding = valid ("conv0", nn.Conv2d(3, 48, kernel_size=5, padding=pad0, bias=True)), ("maxpool0", nn.MaxPool2d(2)), # padding = same ("nonlin1", nonlin()) # 18 ]) block1 = OrderedDict([ # padding = same ("conv1", nn.Conv2d(48, 64, kernel_size=3, padding=1, bias=bias)), ("batchnorm1", nn.BatchNorm2d(64, eps=1e-4)), ("nonlin1", nonlin()), ]) block2 = OrderedDict([ # padding = same ("conv2", nn.Conv2d(64, 64, kernel_size=3, padding=1, bias=bias)), ("batchnorm2", nn.BatchNorm2d(64, eps=1e-4)), ("maxpool1", nn.MaxPool2d(2)), # padding = same ("nonlin2", nonlin()), # 9 ]) block3 = OrderedDict([ # padding = valid ("conv3", nn.Conv2d(64, 128, kernel_size=3, padding=0, bias=bias)), ("batchnorm3", nn.BatchNorm2d(128, eps=1e-4)), ("nonlin3", nonlin()), # 7 ]) block4 = OrderedDict([ # padding = same ("conv4", nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=bias)), ("batchnorm4", nn.BatchNorm2d(128, eps=1e-4)), ("nonlin4", nonlin()), ]) block5 = OrderedDict([ # padding = valid ("conv5", nn.Conv2d(128, 128, kernel_size=3, padding=0, bias=bias)), ("batchnorm5", nn.BatchNorm2d(128, eps=1e-4)), ("nonlin5", nonlin()), # 5 ]) block6 = OrderedDict([ ("dropout", nn.Dropout2d()), # padding = valid ("conv6", nn.Conv2d(128, 512, kernel_size=ks6, padding=0, bias=bias)), ("batchnorm6", nn.BatchNorm2d(512, eps=1e-4)), ("nonlin6", nonlin() if not no_step_last else CAbs()), # ("nonlin6", nonlin() if not relu_last_layer else nn.ReLU()), ]) block7 = OrderedDict([("reshape_fc1", ReshapeBatch(-1)), ("fc1", nn.Linear(512, num_classes, bias=True))]) if not self.use_batchnorm: del block1["batchnorm1"] del block2["batchnorm2"] del block3["batchnorm3"] del block4["batchnorm4"] del block5["batchnorm5"] del block6["batchnorm6"] if not self.use_dropout: del block6["dropout"] block0 = nn.Sequential(block0) block1 = nn.Sequential(block1) block2 = nn.Sequential(block2) block3 = nn.Sequential(block3) block4 = nn.Sequential(block4) block5 = nn.Sequential(block5) block6 = nn.Sequential(block6) block7 = nn.Sequential(block7) if multi_gpu_modules: block0 = nn.DataParallel(block0) block1 = nn.DataParallel(block1) block2 = nn.DataParallel(block2) block3 = nn.DataParallel(block3) block4 = nn.DataParallel(block4) block5 = nn.DataParallel(block5) block6 = nn.DataParallel(block6) block7 = nn.DataParallel(block7) if self.separate_activations: self.all_modules = nn.ModuleList([ nn.Sequential(block0), nn.Sequential(block1), nn.Sequential(block2), nn.Sequential(block3), nn.Sequential(block4), nn.Sequential(block5), nn.Sequential(block6), nn.Sequential(block7), ]) self.all_activations = nn.ModuleList([ nonlin(), nonlin(), nonlin(), nonlin(), nonlin(), nonlin(), nonlin() ]) else: self.all_modules = nn.Sequential( OrderedDict([ ("block0", nn.Sequential(block0)), ("block1", nn.Sequential(block1)), ("block2", nn.Sequential(block2)), ("block3", nn.Sequential(block3)), ("block4", nn.Sequential(block4)), ("block5", nn.Sequential(block5)), ("block6", nn.Sequential(block6)), ("block7", nn.Sequential(block7)), ]))
def __init__(self, nonlin=nn.ReLU, use_batchnorm=False, input_shape=(3, 32, 32), num_classes=10, separate_activations=True, multi_gpu_modules=False): super().__init__() self.use_batchnorm = use_batchnorm self.conv1_size = 32 self.conv2_size = 64 self.fc1_size = 1024 self.fc2_size = num_classes self.separate_activations = separate_activations if input_shape == (1, 28, 28): self.input_sizes = [ list(input_shape), [ self.conv1_size, (input_shape[1] // 4 + 1) * (input_shape[2] // 4 + 1) // 4 - 1, self.conv2_size // 4 - 1 ], [self.conv2_size, input_shape[1] // 4, input_shape[2] // 4], [self.fc1_size], ] else: self.input_sizes = [ list(input_shape), [ self.conv1_size, (input_shape[1] // 4) * (input_shape[2] // 4) // 4 + 1, self.conv2_size // 4 + 1 ], [self.conv2_size, input_shape[1] // 4, input_shape[2] // 4], [self.fc1_size], ] block1 = OrderedDict([ ("conv1", nn.Conv2d(input_shape[0], self.conv1_size, kernel_size=5, padding=3)), ("maxpool1", nn.MaxPool2d(2)), ("nonlin1", nonlin()), ]) block2 = OrderedDict([ ("conv2", nn.Conv2d(self.conv1_size, self.conv2_size, kernel_size=5, padding=2)), ("maxpool2", nn.MaxPool2d(2)), ("nonlin2", nonlin()), ]) block3 = OrderedDict([ ("batchnorm1", nn.BatchNorm2d(self.conv2_size)), ("reshape1", ReshapeBatch(-1)), ("fc1", nn.Linear((input_shape[1] // 4) * (input_shape[2] // 4) * self.conv2_size, self.fc1_size)), ("nonlin3", nonlin()), ]) block4 = OrderedDict([("batchnorm2", nn.BatchNorm1d(self.fc1_size)), ("fc2", nn.Linear(self.fc1_size, self.fc2_size))]) if not self.use_batchnorm: del block3["batchnorm1"] del block4["batchnorm2"] if self.separate_activations: del block1["nonlin1"] del block2["nonlin2"] del block3["nonlin3"] block1 = nn.Sequential(block1) block2 = nn.Sequential(block2) block3 = nn.Sequential(block3) block4 = nn.Sequential(block4) if multi_gpu_modules: block1 = nn.DataParallel(block1) block2 = nn.DataParallel(block2) block3 = nn.DataParallel(block3) block4 = nn.DataParallel(block4) if self.separate_activations: self.all_modules = nn.ModuleList([ block1, block2, block3, block4, ]) self.all_activations = nn.ModuleList( [nonlin(), nonlin(), nonlin()]) else: self.all_modules = nn.Sequential( OrderedDict([ ("block1", block1), ("block2", block2), ("block3", block3), ("block4", block4), ]))