def __init__(self): super().__init__() # Conv1 -> LeakyReLU -> Dropout 0.2 # Conv2 -> LeakyReLU -> MaxPool -> Dropout 0.2 -> Flatten # FC1 -> LeakyReLU -> Dropout 0.2 -> FC2 self.model = nn.Sequential( Conv2d(1, 32, kernel_size=3, stride=1, padding=1), # (32, 28, 28) LeakyReLU(), Dropout(0.2), Conv2d(32, 64, kernel_size=3, stride=1, padding=1), # (64, 28, 28) LeakyReLU(), MaxPool2d(kernel_size=2, stride=2), # (64, 14, 14) Dropout(0.2), Flatten(), Linear(64 * 14 * 14, 128), LeakyReLU(), Dropout(0.2), Linear(128, 10), ) self.apply(initialize_weights)
diff_average = (w1 - w2).mean() if verbose: print("Model on device: {}".format(next(network.parameters()).device)) print("Sample outputs: ") print("Pre teleportation: ", pred1.flatten()[:10]) print("Post teleportation: ", pred2.flatten()[:10]) assert not np.allclose(w1, w2) assert np.allclose( pred1, pred2), "Teleporation did not work. Average difference: {}".format( diff_average) print("Teleportation successful.") return diff_average if __name__ == '__main__': import torch.nn as nn from torch.nn.modules import Flatten from neuralteleportation.layers.layer_utils import swap_model_modules_for_COB_modules cnn_model = torch.nn.Sequential(nn.Conv2d(1, 32, 3, 1), nn.ReLU(), nn.Conv2d(32, 64, 3, stride=2), nn.ReLU(), Flatten(), nn.Linear(9216, 128), nn.ReLU(), nn.Linear(128, 10)) cnn_model = swap_model_modules_for_COB_modules(cnn_model) test_cuda_teleport(network=cnn_model, verbose=True)
def main(**kwargs): min_pts = kwargs['min_points'] max_pts = kwargs['max_points'] current_pts = (max_pts - min_pts) // 2 + min_pts num_classes = kwargs['n_classes'] batch_size = kwargs['batch_size'] count_limit = kwargs['term_count'] num_filters_factor = kwargs['num_filters_factor'] use_flat_model = kwargs['use_flat_model'] num_input_channels = kwargs['num_channels'] print(kwargs) while True: num_points = current_pts print('Testing {} points...'.format(num_points)) random_data = torch.tensor( np.random.rand(num_points, num_input_channels, 16, 16)).float() if num_classes == 1: random_labels = torch.tensor( np.random.randint(0, 2, size=[num_points, 1])).float() else: random_labels = torch.tensor( np.random.randint(0, num_classes, size=[num_points])) if use_flat_model: model = torch.nn.Sequential( Flatten(), torch.nn.Linear(16 * 16 * num_input_channels, int(8 * num_filters_factor)), torch.nn.Sigmoid(), torch.nn.Linear(int(8 * num_filters_factor), num_classes), ) else: model = torch.nn.Sequential( torch.nn.Conv2d(num_input_channels, int(8 * num_filters_factor), (4, 4), (2, 2)), torch.nn.Sigmoid(), Flatten(), torch.nn.Linear(7 * 7 * int(8 * num_filters_factor), num_classes), ) print('Num Params {}'.format( sum([np.prod(i.shape) for i in model.parameters()]))) if num_classes == 1: criterion = torch.nn.BCEWithLogitsLoss() else: criterion = torch.nn.CrossEntropyLoss() if torch.cuda.is_available(): model = model.cuda() opt = torch.optim.Adam(model.parameters(), lr=1e-3) dataset = torch.utils.data.TensorDataset(random_data, random_labels) data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, pin_memory=True) best_acc = 0 static_ct = 0 with tqdm.tqdm() as pbar: while True: epoch_losses = [] epoch_acc = [] for (data, labels) in data_loader: if torch.cuda.is_available(): data = data.cuda() labels = labels.cuda() opt.zero_grad() model_output = model(data) loss = criterion(model_output, labels) with torch.no_grad(): if num_classes == 1: epoch_acc.append( (torch.abs((model_output >= 0.5).float() - labels) <= 0.1).sum().item() / data.shape[0]) else: epoch_acc.append( (model_output.argmax(dim=1) == labels).sum().item() / data.shape[0]) loss.backward() opt.step() epoch_losses.append(loss.item()) if np.mean(epoch_acc) > best_acc: best_acc = np.mean(epoch_acc) static_ct = 0 if best_acc > kwargs['success_threshold']: break else: static_ct += 1 if static_ct > count_limit: break pbar.set_description_str('L: {}, A: {}, B: {}, S: {}'.format( np.mean(epoch_losses), np.mean(epoch_acc), best_acc, static_ct)) pbar.update(1) if static_ct <= count_limit: min_pts = current_pts else: max_pts = current_pts if abs(max_pts - min_pts) < kwargs['search_threshold']: break current_pts = (max_pts - min_pts) // 2 + min_pts print('Done!') print('Max: {}, Min: {}'.format(max_pts, min_pts))
import torchvision.transforms as transforms from neuralteleportation.metrics import accuracy from torch.nn.modules import Flatten import torch.nn as nn mnist_train = MNIST('/tmp', train=True, download=True, transform=transforms.ToTensor()) mnist_val = MNIST('/tmp', train=False, download=True, transform=transforms.ToTensor()) mnist_test = MNIST('/tmp', train=False, download=True, transform=transforms.ToTensor()) model = torch.nn.Sequential(Flatten(), nn.Linear(784, 128), nn.ReLU(), nn.Linear(128, 10)) config = TrainingConfig() metrics = TrainingMetrics(nn.CrossEntropyLoss(), [accuracy]) train(model, train_dataset=mnist_train, metrics=metrics, config=config, val_dataset=mnist_val) print(test(model, mnist_test, metrics, config))
print("Dataset length: ", len(dataset)) h_size = args.h_size latent_size = args.l_size # Define the Gz network architecture Gz_net = torch.nn.Sequential( torch.nn.Conv2d(3, h_size, 1), # 28x28 ResidualConvolutionLayer(h_size, h_size), ResidualConvolutionLayer(h_size, h_size), ResidualConvolutionLayer(h_size, h_size * 2, downscale=True), # 14x14 ResidualConvolutionLayer(h_size * 2, h_size * 2), ResidualConvolutionLayer(h_size * 2, h_size * 2), ResidualConvolutionLayer(h_size * 2, h_size * 4, downscale=True), # 7x7 Flatten(), torch.nn.Linear(7 * 7 * h_size * 4, h_size * 8), torch.nn.BatchNorm1d(h_size * 8), Mish(), torch.nn.Linear(h_size * 8, latent_size * 2), ) Gx_net = torch.nn.Sequential( torch.nn.Linear(latent_size, 7 * 7 * h_size * 4), Reshape(-1, h_size * 4, 7, 7), ResidualConvolutionTransposeLayer(h_size * 4, h_size * 4), ResidualConvolutionTransposeLayer(h_size * 4, h_size * 4), ResidualConvolutionTransposeLayer(h_size * 4, h_size * 2, upscale=True), ResidualConvolutionTransposeLayer(h_size * 2, h_size * 2), ResidualConvolutionTransposeLayer(h_size * 2, h_size * 2), ResidualConvolutionTransposeLayer(h_size * 2, h_size, upscale=True),
def __init__(self, output_blocks=[DEFAULT_BLOCK_INDEX, 4], resize_input=True, normalize_input=True, requires_grad=False, use_fid_inception=True): """Build pretrained InceptionV3 Parameters ---------- output_blocks : list of int Indices of blocks to return features of. Possible values are: - 0: corresponds to output of first max pooling - 1: corresponds to output of second max pooling - 2: corresponds to output which is fed to aux classifier - 3: corresponds to output of final average pooling resize_input : bool If true, bilinearly resizes input to width and height 299 before feeding input to model. As the network without fully connected layers is fully convolutional, it should be able to handle inputs of arbitrary size, so resizing might not be strictly needed normalize_input : bool If true, scales the input from range (0, 1) to the range the pretrained Inception network expects, namely (-1, 1) requires_grad : bool If true, parameters of the model require gradients. Possibly useful for finetuning the network use_fid_inception : bool If true, uses the pretrained Inception model used in Tensorflow's FID implementation. If false, uses the pretrained Inception model available in torchvision. The FID Inception model has different weights and a slightly different structure from torchvision's Inception model. If you want to compute FID scores, you are strongly advised to set this parameter to true to get comparable results. """ super(InceptionV3, self).__init__() self.resize_input = resize_input self.normalize_input = normalize_input self.output_blocks = sorted(output_blocks) self.last_needed_block = max(output_blocks) assert self.last_needed_block <= 4, \ 'Last possible output block index is 3' self.blocks = nn.ModuleList() if use_fid_inception: inception = fid_inception_v3() else: inception = models.inception_v3(pretrained=True) # Block 0: input to maxpool1 block0 = [ inception.Conv2d_1a_3x3, inception.Conv2d_2a_3x3, inception.Conv2d_2b_3x3, nn.MaxPool2d(kernel_size=3, stride=2) ] self.blocks.append(nn.Sequential(*block0)) # Block 1: maxpool1 to maxpool2 if self.last_needed_block >= 1: block1 = [ inception.Conv2d_3b_1x1, inception.Conv2d_4a_3x3, nn.MaxPool2d(kernel_size=3, stride=2) ] self.blocks.append(nn.Sequential(*block1)) # Block 2: maxpool2 to aux classifier if self.last_needed_block >= 2: block2 = [ inception.Mixed_5b, inception.Mixed_5c, inception.Mixed_5d, inception.Mixed_6a, inception.Mixed_6b, inception.Mixed_6c, inception.Mixed_6d, inception.Mixed_6e, ] self.blocks.append(nn.Sequential(*block2)) # Block 3: aux classifier to final avgpool if self.last_needed_block >= 3: block3 = [ inception.Mixed_7a, inception.Mixed_7b, inception.Mixed_7c, nn.AdaptiveAvgPool2d(output_size=(1, 1)) ] self.blocks.append(nn.Sequential(*block3)) if self.last_needed_block >= 4: block4 = [ # N x 2048 x 1 x 1 Flatten(), # N x 2048 inception.fc ] self.blocks.append(nn.Sequential(*block4)) for param in self.parameters(): param.requires_grad = requires_grad
def __init__(self): super(TransferNet, self).__init__() # Preliminary layer self.prep_conv = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) self.prep_bn = nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.prep_relu = nn.ReLU() # Layer 2 self.layer2_conv = nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) self.layer2_bn = nn.BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.layer2_relu = nn.ReLU() self.layer2_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) # Layer 3 self.layer3_conv = nn.Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) self.layer3_bn = nn.BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.layer3_relu = nn.ReLU() self.layer3_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) # Layer 4 self.layer4_conv = nn.Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) self.layer4_bn = nn.BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) self.layer4_relu = nn.ReLU(inplace=True) self.layer4_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) # Final layer self.final_pool = nn.MaxPool2d(kernel_size=4, stride=4, padding=0, dilation=1, ceil_mode=False) self.final_flatten = Flatten() self.final_linear = nn.Linear(in_features=512, out_features=10, bias=True)