def get_conv6(): layers = [] layers += [nn.Conv2d(3, 64, 3, padding=1)] layers += [nn.ReLU(inplace=True)] layers += [nn.Conv2d(64, 64, 3, padding=1)] layers += [nn.ReLU(inplace=True)] layers += [nn.MaxPool2d(kernel_size=2, stride=2)] layers += [nn.Conv2d(64, 128, 3, padding=1)] layers += [nn.ReLU(inplace=True)] layers += [nn.Conv2d(128, 128, 3, padding=1)] layers += [nn.ReLU(inplace=True)] layers += [nn.MaxPool2d(kernel_size=2, stride=2)] layers += [nn.Conv2d(128, 256, 3, padding=1)] layers += [nn.ReLU(inplace=True)] layers += [nn.Conv2d(256, 256, 3, padding=1)] layers += [nn.ReLU(inplace=True)] layers += [nn.MaxPool2d(kernel_size=2, stride=2)] layers += [Flatten()] layers += [nn.Linear(4 * 4 * 256, 256)] layers += [nn.ReLU(inplace=True)] layers += [nn.Linear(256, 256)] layers += [nn.ReLU(inplace=True)] layers += [nn.Linear(256, 10)] return nn.Sequential(*layers)
def cifar10_c4d3(conv_activation=nn.ReLU, dense_activation=nn.ReLU): """CNN for CIFAR-10 dataset with 4 convolutional and 3 fc layers. Modified from: https://github.com/Zhenye-Na/deep-learning-uiuc/tree/master/assignments/mp3 (remove Dropout, Dropout2d and BatchNorm2d) """ return nn.Sequential( # Conv Layer block 1 nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1), conv_activation(), nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, padding=1), conv_activation(), nn.MaxPool2d(kernel_size=2, stride=2), # Conv Layer block 2 nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1), conv_activation(), nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1), conv_activation(), nn.MaxPool2d(kernel_size=2, stride=2), # Flatten Flatten(), # Dense layers nn.Linear(2048, 512), dense_activation(), nn.Linear(512, 64), dense_activation(), nn.Linear(64, 10), )
def __init__(self, binary=False): super().__init__() self.conv1 = nn.Conv2d(1, 32, 5, 1, padding=2) self.conv2 = nn.Conv2d(32, 64, 5, 1, padding=2) self.flatten = Flatten() self.fc1 = nn.Linear(7 * 7 * 64, 1024) self.fc2 = nn.Linear(1024, 1 if binary else 10)
def cifar10_model(): """FCNN architecture used by Chen et al on CIFAR-10. The architecture uses the following neuron structure: 3072-1024-512-256-128-64-32-16-10 with sigmoid activation functions and linear outputs. Use Xavier initialization method for weights, set bias to 0. """ model = Sequential( Flatten(), Linear(3072, 1024), Sigmoid(), Linear(1024, 512), Sigmoid(), Linear(512, 256), Sigmoid(), Linear(256, 128), Sigmoid(), Linear(128, 64), Sigmoid(), Linear(64, 32), Sigmoid(), Linear(32, 16), Sigmoid(), Linear(16, 10), ) xavier_init(model) return model
def make_classification_problem(pooling_cls): model = torch.nn.Sequential(convlayer(), pooling(pooling_cls), Flatten()) Y = torch.randint(high=X.shape[1], size=(model(X).shape[0], )) lossfunc = extend(torch.nn.CrossEntropyLoss()) return TestProblem(X, Y, model, lossfunc)
def make_regression_problem(pooling_cls): model = torch.nn.Sequential(convlayer(), pooling(pooling_cls), Flatten(), linearlayer()) Y = torch.randn(size=(model(X).shape[0], 1)) lossfunc = extend(torch.nn.MSELoss()) return TestProblem(X, Y, model, lossfunc)
def make_regression_problem(conv_cls, act_cls): model = torch.nn.Sequential(convlayer(conv_cls, TEST_SETTINGS), act_cls(), Flatten(), convearlayer(TEST_SETTINGS)) Y = torch.randn(size=(model(X).shape[0], 1)) lossfunc = extend(torch.nn.MSELoss()) return TestProblem(X, Y, model, lossfunc)
def data_conv(): input_size = (TEST_SETTINGS["batch"], ) + TEST_SETTINGS["in_features"] temp_model = Sequential(convlayer(False), convlayer2(False), Flatten()) X = randn(size=input_size) Y = randint(high=X.shape[1], size=(temp_model(X).shape[0], )) del temp_model manual_seed(0) model1 = Sequential(convlayer(False), convlayer2(False), Flatten()) manual_seed(0) model2 = Sequential(convlayer(True), convlayer2(True), Flatten()) loss = CrossEntropyLoss() return X, Y, model1, model2, loss
def make_2layer_classification_problem(conv_cls, act_cls): model = torch.nn.Sequential(convlayer(conv_cls, TEST_SETTINGS), act_cls(), convlayer2(conv_cls, TEST_SETTINGS), act_cls(), Flatten()) Y = torch.randint(high=X.shape[1], size=(model(X).shape[0], )) lossfunc = extend(torch.nn.CrossEntropyLoss()) return TestProblem(X, Y, model, lossfunc)
def dummy_forward_pass_conv(): N, C, H, W = 2, 3, 4, 4 X = torch.randn(N, C, H, W) Y = torch.randint(high=5, size=(N,)) conv = Conv2d(3, 2, 2) lin = Linear(18, 5) model = extend(Sequential(conv, Flatten(), lin)) loss = extend(CrossEntropyLoss()) def forward(): return loss(model(X), Y) return forward, (conv.weight, lin.weight), (conv.bias, lin.bias)
def get_vgg(cfg, use_bn): layers = [] in_channels = 3 for x in cfg: if x == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1)] if use_bn: layers += [nn.Conv2d(x, x, kernel_size=1)] # layers += [nn.BatchNorm2d(x)] layers += [nn.ReLU(inplace=True)] in_channels = x layers += [nn.AvgPool2d(kernel_size=1, stride=1)] layers += [Flatten(), nn.Linear(512, 10)] return nn.Sequential(*layers)
def simple_mnist_model_1st_order(use_gpu=False): """Train on simple MNIST model, using SGD.""" device = torch.device("cuda:0" if use_gpu else "cpu") model = Sequential(Flatten(), Linear(784, 10)) loss_function = CrossEntropyLoss() data_loader = MNISTLoader(1000, 1000) optimizer = SGD(model.parameters(), lr=0.1) # initialize training train = FirstOrderTraining( model, loss_function, optimizer, data_loader, logdir, num_epochs, logs_per_epoch=logs_per_epoch, device=device, ) train.run()
def torch_fn(): """Create sequence of layers in torch.""" set_seeds(0) return Sequential( Conv2d( in_channels, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, bias=True, ), ReLU(), MaxPool2d(pool_kernel, padding=pool_padding), Flatten(), Linear(out1, out2, bias=False), Sigmoid(), Linear(out2, out3, bias=True), )
def training_example(seed, test_batch, use_gpu=False): """Training instance setting seed and test batch size in advance.""" set_seeds(seed) device = torch.device("cuda:0" if use_gpu else "cpu") model = Sequential(Flatten(), Linear(784, 10)) loss_function = CrossEntropyLoss() data_loader = MNISTLoader(1000, test_batch) optimizer = SGD(model.parameters(), lr=0.1) # initialize training train = FirstOrderTraining( model, loss_function, optimizer, data_loader, logdir, num_epochs, logs_per_epoch=logs_per_epoch, device=device, ) return train
def mnist_model(): """FCNN architecture used by Chen et al on MNIST. The architecture uses the following structure: (784->512)->(sigmoid)->(512->128)->(sigmoid)->(128->32)-> (sigmoid)->(32->10) Use Xavier initialization method for weights, set bias to 0. """ model = Sequential( Flatten(), Linear(784, 512), Sigmoid(), Linear(512, 128), Sigmoid(), Linear(128, 32), Sigmoid(), Linear(32, 10), ) xavier_init(model) return model
def simple_mnist_model_2nd_order_cvp(use_gpu=False): """Train on simple MNIST model using 2nd order optimizer CVP.""" device = torch.device("cuda:0" if use_gpu else "cpu") model = convert_torch_to_cvp(Sequential(Flatten(), Linear(784, 10))) loss_function = convert_torch_to_cvp(CrossEntropyLoss()) data_loader = MNISTLoader(1000, 1000) optimizer = CGNewton(model.parameters(), lr=0.1, alpha=0.1) num_epochs, logs_per_epoch = 1, 5 modify_2nd_order_terms = "abs" logdir = directory_in_data("test_training_simple_mnist_model") # initialize training train = CVPSecondOrderTraining( model, loss_function, optimizer, data_loader, logdir, num_epochs, modify_2nd_order_terms, logs_per_epoch=logs_per_epoch, device=device, ) train.run()
def training_fn(): """Return training instance.""" device = torch.device("cuda:0" if use_gpu else "cpu") model = Sequential(Flatten(), Linear(784, 10)) loss_function = CrossEntropyLoss() data_loader = MNISTLoader(1000, 1000) optimizer = SGD(model.parameters(), lr=0.1) num_epochs, logs_per_epoch = 1, 5 logdir = directory_in_data( "test_training_simple_mnist_model_{}".format( "gpu" if use_gpu else "cpu")) # initialize training train = FirstOrderTraining( model, loss_function, optimizer, data_loader, logdir, num_epochs, logs_per_epoch=logs_per_epoch, device=device, ) return train
Y = torch.randint(high=2, size=(N, )) else: raise NotImplementedError return (X, Y) models = [ Sequential(xtd(Linear(D, 2))), Sequential(xtd(Linear(D, 2)), xtd(ReLU())), Sequential(xtd(Linear(D, 2)), xtd(Sigmoid())), Sequential(xtd(Linear(D, 2)), xtd(Tanh())), Sequential(xtd(Linear(D, 2)), xtd(Dropout())), ] img_models = [ Sequential(xtd(Conv2d(3, 2, 2)), Flatten(), xtd(Linear(18, 2))), Sequential(xtd(MaxPool2d(3)), Flatten(), xtd(Linear(3, 2))), Sequential(xtd(AvgPool2d(3)), Flatten(), xtd(Linear(3, 2))), # Sequential(xtd(Conv2d(3, 2, 2)), xtd(MaxPool2d(3)), Flatten(), xtd(Linear(2, 2))), # Sequential(xtd(Conv2d(3, 2, 2)), xtd(AvgPool2d(3)), Flatten(), xtd(Linear(2, 2))), # Sequential(xtd(Conv2d(3, 2, 2)), xtd(ReLU()), Flatten(), xtd(Linear(18, 2))), # Sequential(xtd(Conv2d(3, 2, 2)), xtd(Sigmoid()), Flatten(), xtd(Linear(18, 2))), # Sequential(xtd(Conv2d(3, 2, 2)), xtd(Tanh()), Flatten(), xtd(Linear(18, 2))), # Sequential(xtd(Conv2d(3, 2, 2)), xtd(Dropout()), Flatten(), xtd(Linear(18, 2))), ] def all_problems(): problems = [] for model in models: problems.append(
transform=torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=BATCH_SIZE, shuffle=True) model = torch.nn.Sequential( torch.nn.Conv2d(1, 20, 5, 1), torch.nn.ReLU(), torch.nn.MaxPool2d(2, 2), torch.nn.Conv2d(20, 50, 5, 1), torch.nn.ReLU(), torch.nn.MaxPool2d(2, 2), Flatten(), # Pytorch <1.2 doesn't have a Flatten layer torch.nn.Linear(4 * 4 * 50, 500), torch.nn.ReLU(), torch.nn.Linear(500, 10), ) loss_function = torch.nn.CrossEntropyLoss() def get_accuracy(output, targets): """Helper function to print the accuracy""" predictions = output.argmax(dim=1, keepdim=True).view_as(targets) return predictions.eq(targets).float().mean().item()
def get_modules(self): modules = self.get_network_modules() modules.append(Flatten()) return modules
def torch_fn(): return Flatten()
def get_modules(self): modules = self.get_network_modules() modules.append(Flatten()) modules.append(self.sum_output_layer()) return modules
def mean_allcnnc(): """The all convolution layer implementation of torch.mean(). Use the backpack version of the flatten layer - edited by Xingchen Wan""" from backpack.core.layers import Flatten return nn.Sequential(nn.AvgPool2d(kernel_size=(6, 6)), Flatten())
def replace_deepobs_flatten(c3d3): """Replace DeepOBS flatten with bpexts Flatten.""" c3d3.flatten = Flatten() return c3d3