class Model(): def __init__(self, num_epochs=5, num_classes=10, batch_size=100, learning_rate=0.001): self.num_epochs = num_epochs self.num_classes = num_classes self.batch_size = batch_size self.learning_rate = learning_rate self.model = ConvNet(num_classes) # Loss and optimizer self.criterion = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate) def train(self, train_loader): total_step = len(train_loader) for epoch in range(self.num_epochs): for i, (images, labels) in enumerate(train_loader): # Forward pass outputs = self.model(images) loss = self.criterion(outputs, labels) # Backward and optimize self.optimizer.zero_grad() loss.backward() self.optimizer.step() if (i + 1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, self.num_epochs, i + 1, total_step, loss.item())) def eval(self, test_loader): self.model.eval() with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: outputs = self.model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() def save(self): # Save the model checkpoint torch.save(self.model.state_dict(), 'model.ckpt')
class MnistOptimizee: def __init__(self, seed, n_ensembles, batch_size, root, path): self.random_state = np.random.RandomState(seed=seed) self.n_ensembles = n_ensembles self.batch_size = batch_size self.root = root self.path = path self.conv_net = ConvNet().to(device) self.criterion = nn.CrossEntropyLoss() self.data_loader = DataLoader() self.data_loader.init_iterators(self.root, self.path, self.batch_size) self.dataiter_mnist = self.data_loader.dataiter_mnist self.testiter_mnist = self.data_loader.testiter_mnist self.dataiter_notmnist = self.data_loader.dataiter_notmnist self.testiter_notmnist = self.data_loader.testiter_notmnist self.generation = 0 self.inputs, self.labels = self.dataiter_mnist() self.test_input, self.test_label = self.testiter_mnist() # For plotting self.train_pred = [] self.test_pred = [] self.train_acc = [] self.test_acc = [] self.train_cost = [] self.test_cost = [] self.targets = [] self.output_activity_train = [] self.output_activity_test = [] self.targets.append(self.labels.numpy()) def create_individual(self): # get weights, biases from networks and flatten them # convolutional network parameters ### conv_ensembles = [] with torch.no_grad(): # weights for layers, conv1, conv2, fc1 # conv1_weights = self.conv_net.state_dict()['conv1.weight'].view(-1).numpy() # conv2_weights = self.conv_net.state_dict()['conv2.weight'].view(-1).numpy() # fc1_weights = self.conv_net.state_dict()['fc1.weight'].view(-1).numpy() # bias # conv1_bias = self.conv_net.state_dict()['conv1.bias'].numpy() # conv2_bias = self.conv_net.state_dict()['conv2.bias'].numpy() # fc1_bias = self.conv_net.state_dict()['fc1.bias'].numpy() # stack everything into a vector of # conv1_weights, conv1_bias, conv2_weights, conv2_bias, # fc1_weights, fc1_bias # params = np.hstack((conv1_weights, conv1_bias, conv2_weights, # conv2_bias, fc1_weights, fc1_bias)) length = 0 for key in self.conv_net.state_dict().keys(): length += self.conv_net.state_dict()[key].nelement() # conv_ensembles.append(params) # l = np.random.uniform(-.5, .5, size=length) for _ in range(self.n_ensembles): # stacked = [] # for key in self.conv_net.state_dict().keys(): # stacked.extend( # self._he_init(self.conv_net.state_dict()[key])) # conv_ensembles.append(stacked) # tmp = [] # for j in l: # jitter = np.random.uniform(-0.1, 0.1) + j # tmp.append(jitter) # conv_ensembles.append(tmp) conv_ensembles.append(np.random.uniform(-1, 1, size=length)) # convert targets to numpy() targets = tuple([label.numpy() for label in self.labels]) return dict(conv_params=np.array(conv_ensembles), targets=targets, input=self.inputs.squeeze().numpy()) @staticmethod def _he_init(weights, gain=0): """ He- or Kaiming- initialization as in He et al., "Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification". Values are sampled from :math:`\\mathcal{N}(0, \\text{std})` where .. math:: \text{std} = \\sqrt{\\frac{2}{(1 + a^2) \\times \text{fan\\_in}}} Note: Only for the case that the non-linearity of the network activation is `relu` :param weights, tensor :param gain, additional scaling factor, Default is 0 :return: numpy nd array, random array of size `weights` """ fan_in = torch.nn.init._calculate_correct_fan(weights, 'fan_in') stddev = np.sqrt(2. / fan_in * (1 + gain**2)) return np.random.normal(0, stddev, weights.numel()) def set_parameters(self, ensembles): # set the new parameter for the network conv_params = np.mean(ensembles, axis=0) ds = self._shape_parameter_to_conv_net(conv_params) self.conv_net.set_parameter(ds) print('---- Train -----') print('Generation ', self.generation) generation_change = 8 dataset_change = 500 with torch.no_grad(): inputs = self.inputs labels = self.labels if self.generation % generation_change == 0 and self.generation < dataset_change: self.inputs, self.labels = self.dataiter_mnist() print('New MNIST set used at generation {}'.format( self.generation)) # append the outputs self.targets.append(self.labels.numpy()) elif model.generation >= dataset_change and self.generation % generation_change == 0: if model.generation == generation_change: self.test_input, self.test_label = self.testiter_notmnist() self.inputs, self.labels = self.dataiter_notmnist() print('New notMNIST set used at generation {}'.format( self.generation)) # append the outputs self.targets.append(self.labels.numpy()) outputs = self.conv_net(inputs) self.output_activity_train.append(outputs.numpy()) conv_loss = self.criterion(outputs, labels).item() train_cost = _calculate_cost(_encode_targets(labels, 10), outputs.numpy(), 'MSE') train_acc = score(labels.numpy(), np.argmax(outputs.numpy(), 1)) print('Cost: ', train_cost) print('Accuracy: ', train_acc) print('Loss:', conv_loss) self.train_cost.append(train_cost) self.train_acc.append(train_acc) self.train_pred.append(np.argmax(outputs.numpy(), 1)) print('---- Test -----') test_output = self.conv_net(self.test_input) test_output = test_output.numpy() test_acc = score(self.test_label.numpy(), np.argmax(test_output, 1)) test_cost = _calculate_cost(_encode_targets(self.test_label, 10), test_output, 'MSE') print('Test accuracy', test_acc) self.test_acc.append(test_acc) self.test_pred.append(np.argmax(test_output, 1)) self.test_cost.append(test_cost) self.output_activity_test.append(test_output) print('-----------------') conv_params = [] for c in ensembles: ds = self._shape_parameter_to_conv_net(c) self.conv_net.set_parameter(ds) conv_params.append(self.conv_net(inputs).numpy().T) outs = { 'conv_params': np.array(conv_params), 'conv_loss': float(conv_loss), 'input': self.inputs.squeeze().numpy(), 'targets': self.labels.numpy() } return outs def _shape_parameter_to_conv_net(self, params): param_dict = dict() start = 0 for key in self.conv_net.state_dict().keys(): shape = self.conv_net.state_dict()[key].shape length = self.conv_net.state_dict()[key].nelement() end = start + length param_dict[key] = params[start:end].reshape(shape) start = end return param_dict
loss = criterion(outputs, labels) #print(running_loss) loss.backward() optimizer.step() running_loss += loss.item() if i % 50 == 49: print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 #print(running_loss) scheduler.step(running_loss) torch.save( { 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss, }, '/home/sharan/model_1.pth') del train_loader, train, input_tensor_stack_training torch.cuda.empty_cache() model = ConvNet() model.cuda(cuda0) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) checkpoint = torch.load('/home/sharan/model_1.pth') model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) loss = checkpoint['loss']
total += labels.size(0) correct += (predicted == labels).sum().item() print( 'Test Accuracy of the model on the 10000 test images: {} %'.format( 100 * correct / total)) # Train the model total_step = len(train_loader) for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): images = images.to(device) labels = labels.to(device) # Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i + 1) % 100 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format( epoch + 1, num_epochs, i + 1, total_step, loss.item())) test_model() # Save the model checkpoint torch.save(model.state_dict(), 'cnn_mnist.ckpt')
class MnistOptimizee(torch.nn.Module): def __init__(self, seed, n_ensembles, batch_size, root): super(MnistOptimizee, self).__init__() self.random_state = np.random.RandomState(seed=seed) self.n_ensembles = n_ensembles self.batch_size = batch_size self.root = root self.conv_net = ConvNet().to(device) self.criterion = nn.CrossEntropyLoss() self.data_loader = DataLoader() self.data_loader.init_iterators(self.root, self.batch_size) self.dataiter_mnist = self.data_loader.dataiter_mnist self.testiter_mnist = self.data_loader.testiter_mnist self.generation = 0 self.inputs, self.labels = self.dataiter_mnist() self.inputs = self.inputs.to(device) self.labels = self.labels.to(device) self.test_input, self.test_label = self.testiter_mnist() self.test_input = self.test_input.to(device) self.test_label = self.test_label.to(device) self.timings = { 'shape_parameters': [], 'set_parameters': [], 'set_parameters_cnn': [], 'shape_parameters_ens': [] } self.length = 0 for key in self.conv_net.state_dict().keys(): self.length += self.conv_net.state_dict()[key].nelement() def create_individual(self): # get weights, biases from networks and flatten them # convolutional network parameters ### conv_ensembles = [] with torch.no_grad(): # weights for layers, conv1, conv2, fc1 # conv1_weights = self.conv_net.state_dict()['conv1.weight'].view(-1).numpy() # conv2_weights = self.conv_net.state_dict()['conv2.weight'].view(-1).numpy() # fc1_weights = self.conv_net.state_dict()['fc1.weight'].view(-1).numpy() # bias # conv1_bias = self.conv_net.state_dict()['conv1.bias'].numpy() # conv2_bias = self.conv_net.state_dict()['conv2.bias'].numpy() # fc1_bias = self.conv_net.state_dict()['fc1.bias'].numpy() # stack everything into a vector of # conv1_weights, conv1_bias, conv2_weights, conv2_bias, # fc1_weights, fc1_bias # params = np.hstack((conv1_weights, conv1_bias, conv2_weights, # conv2_bias, fc1_weights, fc1_bias)) # length = 0 # for key in self.conv_net.state_dict().keys(): # length += self.conv_net.state_dict()[key].nelement() # conv_ensembles.append(params) # l = np.random.uniform(-.5, .5, size=length) for _ in range(self.n_ensembles): # stacked = [] # for key in self.conv_net.state_dict().keys(): # stacked.extend( # self._he_init(self.conv_net.state_dict()[key])) # conv_ensembles.append(stacked) # tmp = [] # for j in l: # jitter = np.random.uniform(-0.1, 0.1) + j # tmp.append(jitter) # conv_ensembles.append(tmp) conv_ensembles.append( np.random.normal(0, 0.1, size=self.length)) return dict(conv_params=torch.as_tensor(conv_ensembles, device=device), targets=self.labels, input=self.inputs.squeeze()) def load_model(self, path='conv_params.npy'): print('Loading model from path: {}'.format(path)) conv_params = np.load(path).item() conv_ensembles = conv_params.get('ensemble') return dict(conv_params=torch.as_tensor(conv_ensembles, device=device), targets=self.labels, input=self.inputs.squeeze()) @staticmethod def _he_init(weights, gain=0): """ He- or Kaiming- initialization as in He et al., "Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification". Values are sampled from :math:`\\mathcal{N}(0, \\text{std})` where .. math:: \text{std} = \\sqrt{\\frac{2}{(1 + a^2) \\times \text{fan\\_in}}} Note: Only for the case that the non-linearity of the network activation is `relu` :param weights, tensor :param gain, additional scaling factor, Default is 0 :return: numpy nd array, random array of size `weights` """ fan_in = torch.nn.init._calculate_correct_fan(weights, 'fan_in') stddev = np.sqrt(2. / fan_in * (1 + gain**2)) return np.random.normal(0, stddev, weights.numel()) def set_parameters(self, ensembles): # set the new parameter for the network conv_params = ensembles.mean(0) t = time.time() ds = self._shape_parameter_to_conv_net(conv_params) self.timings['shape_parameters_ens'].append(time.time() - t) t = time.time() self.conv_net.set_parameter(ds) self.timings['set_parameters_cnn'].append(time.time() - t) print('---- Train -----') print('Generation ', self.generation) generation_change = 1 with torch.no_grad(): inputs = self.inputs.to(device) labels = self.labels.to(device) if self.generation % generation_change == 0: self.inputs, self.labels = self.dataiter_mnist() self.inputs = self.inputs.to(device) self.labels = self.labels.to(device) print('New MNIST set used at generation {}'.format( self.generation)) outputs, act1, act2 = self.conv_net(inputs) conv_loss = self.criterion(outputs, labels).item() train_cost = _calculate_cost(_encode_targets(labels, 10), F.softmax(outputs, dim=1), 'MSE') train_acc = score(labels, torch.argmax(F.softmax(outputs, dim=1), 1)) print('Cost: ', train_cost) print('Accuracy: ', train_acc) print('Loss:', conv_loss) print('---- Test -----') test_output, act1, act2 = self.conv_net(self.test_input) test_loss = self.criterion(test_output, self.test_label).item() test_acc = score(self.test_label, torch.argmax(test_output, 1)) test_cost = _calculate_cost(_encode_targets(self.test_label, 10), test_output, 'MSE') print('Test accuracy', test_acc) print('Test loss: {}'.format(test_loss)) print('-----------------') conv_params = [] for idx, c in enumerate(ensembles): t = time.time() ds = self._shape_parameter_to_conv_net(c) self.timings['shape_parameters'].append(time.time() - t) t = time.time() self.conv_net.set_parameter(ds) self.timings['set_parameters'].append(time.time() - t) params, _, _ = self.conv_net(inputs) conv_params.append(params.t()) conv_params = torch.stack(conv_params) outs = { 'conv_params': conv_params, 'conv_loss': float(conv_loss), 'input': self.inputs.squeeze(), 'targets': self.labels } return outs def _shape_parameter_to_conv_net(self, params): param_dict = dict() start = 0 for key in self.conv_net.state_dict().keys(): shape = self.conv_net.state_dict()[key].shape length = self.conv_net.state_dict()[key].nelement() end = start + length param_dict[key] = params[start:end].reshape(shape) start = end return param_dict
class MnistOptimizee(torch.nn.Module): def __init__(self, n_ensembles, batch_size, root): super(MnistOptimizee, self).__init__() self.n_ensembles = n_ensembles self.batch_size = batch_size self.root = root self.length = 0 self.conv_net = ConvNet().to(device) self.criterion = nn.CrossEntropyLoss() self.data_loader = DataLoader() self.data_loader.init_iterators(self.root, self.batch_size) self.dataiter_mnist = self.data_loader.dataiter_mnist self.testiter_mnist = self.data_loader.testiter_mnist self.generation = 0 # iterations self.inputs, self.labels = self.dataiter_mnist() self.inputs = self.inputs.to(device) self.labels = self.labels.to(device) self.test_input, self.test_label = self.testiter_mnist() self.test_input = self.test_input.to(device) self.test_label = self.test_label.to(device) # For plotting self.train_pred = [] self.test_pred = [] self.train_acc = [] self.test_acc = [] self.train_cost = [] self.test_cost = [] self.train_loss = [] self.test_loss = [] self.targets = [] self.output_activity_train = [] self.output_activity_test = [] self.act_func = { 'act1': [], 'act2': [], 'act1_mean': [], 'act2_mean': [], 'act1_std': [], 'act2_std': [], 'act3': [], 'act3_mean': [], 'act3_std': [] } self.test_act_func = { 'act1': [], 'act2': [], 'act1_mean': [], 'act2_mean': [], 'act1_std': [], 'act2_std': [], 'act3': [], 'act3_mean': [], 'act3_std': [] } self.targets.append(self.labels.cpu().numpy()) for key in self.conv_net.state_dict().keys(): self.length += self.conv_net.state_dict()[key].nelement() def create_individual(self): # get weights, biases from networks and flatten them # convolutional network parameters conv_ensembles = [] sigma = config['sigma'] with torch.no_grad(): for _ in range(self.n_ensembles): conv_ensembles.append( np.random.normal(0, sigma, size=self.length)) return dict(conv_params=torch.as_tensor(np.array(conv_ensembles), device=device), targets=self.labels, input=self.inputs.squeeze()) def load_model(self, path='conv_params.npy'): print('Loading model from path: {}'.format(path)) conv_params = np.load(path).item() conv_ensembles = conv_params.get('ensemble') return dict(conv_params=torch.as_tensor(np.array(conv_ensembles), device=device), targets=self.labels, input=self.inputs.squeeze()) def set_parameters(self, ensembles): # set the new parameter for the network conv_params = ensembles.mean(0) ds = self._shape_parameter_to_conv_net(conv_params) self.conv_net.set_parameter(ds) print('---- Train -----') print('Iteration ', self.generation) generation_change = config['repetitions'] with torch.no_grad(): inputs = self.inputs.to(device) labels = self.labels.to(device) if self.generation % generation_change == 0: self.inputs, self.labels = self.dataiter_mnist() self.inputs = self.inputs.to(device) self.labels = self.labels.to(device) print('New MNIST set used at generation {}'.format( self.generation)) # append the outputs self.targets.append(self.labels.cpu().numpy()) # get network predicitions outputs, act1, act2 = self.conv_net(inputs) act3 = outputs # save all important calculations self.act_func['act1'] = act1.cpu().numpy() self.act_func['act2'] = act2.cpu().numpy() self.act_func['act3'] = act3.cpu().numpy() self.act_func['act1_mean'].append(act1.mean().item()) self.act_func['act2_mean'].append(act2.mean().item()) self.act_func['act3_mean'].append(act3.mean().item()) self.act_func['act1_std'].append(act1.std().item()) self.act_func['act2_std'].append(act2.std().item()) self.act_func['act3_std'].append(act3.std().item()) self.output_activity_train.append( F.softmax(outputs, dim=1).cpu().numpy()) conv_loss = self.criterion(outputs, labels).item() self.train_loss.append(conv_loss) train_cost = _calculate_cost( _encode_targets(labels, 10), F.softmax(outputs, dim=1).cpu().numpy(), 'MSE') train_acc = score( labels.cpu().numpy(), np.argmax(F.softmax(outputs, dim=1).cpu().numpy(), 1)) print('Cost: ', train_cost) print('Accuracy: ', train_acc) print('Loss:', conv_loss) self.train_cost.append(train_cost) self.train_acc.append(train_acc) self.train_pred.append( np.argmax(F.softmax(outputs, dim=1).cpu().numpy(), 1)) print('---- Test -----') test_output, act1, act2 = self.conv_net(self.test_input) test_loss = self.criterion(test_output, self.test_label).item() self.test_act_func['act1'] = act1.cpu().numpy() self.test_act_func['act2'] = act2.cpu().numpy() self.test_act_func['act1_mean'].append(act1.mean().item()) self.test_act_func['act2_mean'].append(act2.mean().item()) self.test_act_func['act3_mean'].append(test_output.mean().item()) self.test_act_func['act1_std'].append(act1.std().item()) self.test_act_func['act2_std'].append(act2.std().item()) self.test_act_func['act3_std'].append(test_output.std().item()) test_output = test_output.cpu().numpy() self.test_act_func['act3'] = test_output test_acc = score(self.test_label.cpu().numpy(), np.argmax(test_output, 1)) test_cost = _calculate_cost( _encode_targets(self.test_label.cpu().numpy(), 10), test_output, 'MSE') print('Test accuracy', test_acc) print('Test loss: {}'.format(test_loss)) self.test_acc.append(test_acc) self.test_pred.append(np.argmax(test_output, 1)) self.test_cost.append(test_cost) self.output_activity_test.append(test_output) self.test_loss.append(test_loss) print('-----------------') conv_params = [] for idx, c in enumerate(ensembles): ds = self._shape_parameter_to_conv_net(c) self.conv_net.set_parameter(ds) params, _, _ = self.conv_net(inputs) conv_params.append(params.t().cpu().numpy()) outs = { 'conv_params': torch.tensor(conv_params).to(device), 'conv_loss': float(conv_loss), 'input': self.inputs.squeeze(), 'targets': self.labels } return outs def _shape_parameter_to_conv_net(self, params): """ Create a dictionary which includes the shapes of the network architecture per layer """ param_dict = dict() start = 0 for key in self.conv_net.state_dict().keys(): shape = self.conv_net.state_dict()[key].shape length = self.conv_net.state_dict()[key].nelement() end = start + length param_dict[key] = params[start:end].reshape(shape) start = end return param_dict
class MnistOptimizee(torch.nn.Module): def __init__(self, seed, n_ensembles, batch_size, root): super(MnistOptimizee, self).__init__() self.random_state = np.random.RandomState(seed=seed) self.n_ensembles = n_ensembles self.batch_size = batch_size self.root = root self.conv_net = ConvNet().to(device) self.criterion = nn.CrossEntropyLoss() self.data_loader = DataLoader() self.data_loader.init_iterators(self.root, self.batch_size) self.dataiter_mnist = self.data_loader.dataiter_mnist self.testiter_mnist = self.data_loader.testiter_mnist self.generation = 0 self.inputs, self.labels = self.dataiter_mnist() self.inputs = self.inputs.to(device) self.labels = self.labels.to(device) self.test_input, self.test_label = self.testiter_mnist() self.test_input = self.test_input.to(device) self.test_label = self.test_label.to(device) # For plotting self.train_pred = [] self.test_pred = [] self.train_acc = [] self.test_acc = [] self.train_cost = [] self.test_cost = [] self.train_loss = [] self.test_loss = [] self.targets = [] self.output_activity_train = [] self.output_activity_test = [] self.act_func = { 'act1': [], 'act2': [], 'act1_mean': [], 'act2_mean': [], 'act1_std': [], 'act2_std': [], 'act3': [], 'act3_mean': [], 'act3_std': [] } self.test_act_func = { 'act1': [], 'act2': [], 'act1_mean': [], 'act2_mean': [], 'act1_std': [], 'act2_std': [], 'act3': [], 'act3_mean': [], 'act3_std': [] } self.targets.append(self.labels.cpu().numpy()) # Covariance noise matrix self.cov = 0.0 self.length = 0 for key in self.conv_net.state_dict().keys(): self.length += self.conv_net.state_dict()[key].nelement() def create_individual(self): # get weights, biases from networks and flatten them # convolutional network parameters ### conv_ensembles = [] with torch.no_grad(): for _ in range(self.n_ensembles): conv_ensembles.append( np.random.normal(0, 0.1, size=self.length)) return dict(conv_params=torch.as_tensor(np.array(conv_ensembles), device=device), targets=self.labels, input=self.inputs.squeeze()) def load_model(self, path='conv_params.npy'): print('Loading model from path: {}'.format(path)) conv_params = np.load(path).item() conv_ensembles = conv_params.get('ensemble') return dict(conv_params=torch.as_tensor(np.array(conv_ensembles), device=device), targets=self.labels, input=self.inputs.squeeze()) @staticmethod def _he_init(weights, gain=0): """ He- or Kaiming- initialization as in He et al., "Delving deep into rectifiers: Surpassing human-level performance on ImageNet classification". Values are sampled from :math:`\\mathcal{N}(0, \\text{std})` where .. math:: \text{std} = \\sqrt{\\frac{2}{(1 + a^2) \\times \text{fan\\_in}}} Note: Only for the case that the non-linearity of the network activation is `relu` :param weights, tensor :param gain, additional scaling factor, Default is 0 :return: numpy nd array, random array of size `weights` """ fan_in = torch.nn.init._calculate_correct_fan(weights, 'fan_in') stddev = np.sqrt(2. / fan_in * (1 + gain**2)) return np.random.normal(0, stddev, weights.numel()) def set_parameters(self, ensembles): # set the new parameter for the network conv_params = ensembles.mean(0) ds = self._shape_parameter_to_conv_net(conv_params) self.conv_net.set_parameter(ds) print('---- Train -----') print('Generation ', self.generation) generation_change = 8 with torch.no_grad(): inputs = self.inputs.to(device) labels = self.labels.to(device) if self.generation % generation_change == 0: self.inputs, self.labels = self.dataiter_mnist() self.inputs = self.inputs.to(device) self.labels = self.labels.to(device) print('New MNIST set used at generation {}'.format( self.generation)) # append the outputs self.targets.append(self.labels.cpu().numpy()) outputs, act1, act2 = self.conv_net(inputs) act3 = outputs self.act_func['act1'] = act1.cpu().numpy() self.act_func['act2'] = act2.cpu().numpy() self.act_func['act3'] = act3.cpu().numpy() self.act_func['act1_mean'].append(act1.mean().item()) self.act_func['act2_mean'].append(act2.mean().item()) self.act_func['act3_mean'].append(act3.mean().item()) self.act_func['act1_std'].append(act1.std().item()) self.act_func['act2_std'].append(act2.std().item()) self.act_func['act3_std'].append(act3.std().item()) self.output_activity_train.append( F.softmax(outputs, dim=1).cpu().numpy()) conv_loss = self.criterion(outputs, labels).item() self.train_loss.append(conv_loss) train_cost = _calculate_cost( _encode_targets(labels, 10), F.softmax(outputs, dim=1).cpu().numpy(), 'MSE') train_acc = score( labels.cpu().numpy(), np.argmax(F.softmax(outputs, dim=1).cpu().numpy(), 1)) print('Cost: ', train_cost) print('Accuracy: ', train_acc) print('Loss:', conv_loss) self.train_cost.append(train_cost) self.train_acc.append(train_acc) self.train_pred.append( np.argmax(F.softmax(outputs, dim=1).cpu().numpy(), 1)) print('---- Test -----') test_output, act1, act2 = self.conv_net(self.test_input) test_loss = self.criterion(test_output, self.test_label).item() self.test_act_func['act1'] = act1.cpu().numpy() self.test_act_func['act2'] = act2.cpu().numpy() self.test_act_func['act1_mean'].append(act1.mean().item()) self.test_act_func['act2_mean'].append(act2.mean().item()) self.test_act_func['act3_mean'].append(test_output.mean().item()) self.test_act_func['act1_std'].append(act1.std().item()) self.test_act_func['act2_std'].append(act2.std().item()) self.test_act_func['act3_std'].append(test_output.std().item()) test_output = test_output.cpu().numpy() self.test_act_func['act3'] = test_output test_acc = score(self.test_label.cpu().numpy(), np.argmax(test_output, 1)) test_cost = _calculate_cost( _encode_targets(self.test_label.cpu().numpy(), 10), test_output, 'MSE') print('Test accuracy', test_acc) print('Test loss: {}'.format(test_loss)) self.test_acc.append(test_acc) self.test_pred.append(np.argmax(test_output, 1)) self.test_cost.append(test_cost) self.output_activity_test.append(test_output) self.test_loss.append(test_loss) print('-----------------') conv_params = [] for idx, c in enumerate(ensembles): ds = self._shape_parameter_to_conv_net(c) self.conv_net.set_parameter(ds) params, _, _ = self.conv_net(inputs) conv_params.append(params.t().cpu().numpy()) outs = { 'conv_params': torch.tensor(conv_params).to(device), 'conv_loss': float(conv_loss), 'input': self.inputs.squeeze(), 'targets': self.labels } return outs def _shape_parameter_to_conv_net(self, params): param_dict = dict() start = 0 for key in self.conv_net.state_dict().keys(): shape = self.conv_net.state_dict()[key].shape length = self.conv_net.state_dict()[key].nelement() end = start + length param_dict[key] = params[start:end].reshape(shape) start = end return param_dict