def test_genome_conversion_with_jumps_1(self): original_genome = generate_genome_given_graph(graph=((-1, 2), (-2, 2), (2, 0), (2, 1), (-1, 1)), connection_weights=(1.0, 2.0, 3.0, 0.5, 0.75)) network = ComplexStochasticNetwork(genome=original_genome) new_genome = convert_stochastic_network_to_genome(network=network, original_genome=original_genome) new_network = ComplexStochasticNetwork(genome=new_genome) self.assertEqual(original_genome, new_genome) compare_networks(network, new_network) self.assertTrue(equal(network, new_network))
def test_network_structure_1(self): genome = generate_genome_given_graph(graph=((-1, 1), (-2, 1), (1, 0)), connection_weights=(1.0, 2.0, 3.0)) n_samples = 1 input_data = torch.tensor([[1.0, 1.0]]) input_data = input_data.view(-1, genome.n_input).repeat(n_samples, 1) model = ComplexStochasticNetwork(genome=genome) self.assertEqual(model.layers[0].input_keys, [1]) self.assertTrue( torch.allclose(model.layers[0].weight_mean, torch.tensor([[3.0]]), atol=1e-02)) self.assertEqual(model.layers[1].input_keys, [-2, -1]) self.assertTrue( torch.allclose(model.layers[1].weight_mean, torch.tensor([[2.0, 1.0]]), atol=1e-02)) y, _ = model(input_data) expected_y = 9.0 self.assertAlmostEqual(expected_y, y.numpy()[0][0], places=2)
def test_regression_case(self): config = create_configuration(filename='/regression-siso.json') config.parallel_evaluation = False genome = Genome(key=1) genome.create_random_genome() dataset = get_dataset(config.dataset, train_percentage=config.train_percentage, testing=True) n_samples = 3 network = ComplexStochasticNetwork(genome=genome) x, y_true, output_distribution = calculate_prediction_distribution( network, dataset=dataset, problem_type=config.problem_type, is_testing=True, n_samples=n_samples, use_sigmoid=False) expected_output_distribution_shape = [ len(y_true), n_samples, config.n_output ] self.assertEqual(list(output_distribution.shape), expected_output_distribution_shape)
def run(self) -> None: ''' Calculates: KL-Div(q(w)||p(w|D)) Uses the VariationalInferenceLoss class (not the alternative) ''' # from experiments.multiprocessing_utils import ForkedPdb; ForkedPdb().set_trace() kl_posterior = 0 kl_qw_pw = compute_kl_qw_pw(genome=self.genome) # setup network network = ComplexStochasticNetwork(genome=self.genome) # m = math.ceil(len(self.dataset) / self.batch_size) m = math.ceil(len(self.x) / self.batch_size) network.eval() # calculate Data log-likelihood (p(y*|x*,D)) # x_batch, y_batch = self.dataset.x, self.dataset.y x_batch, y_batch = self.x, self.y x_batch, y_batch = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=self.problem_type, is_gpu=self.is_gpu, n_input=self.genome.n_input, n_output=self.genome.n_output, n_samples=self.n_samples) print('running forward pass') with torch.no_grad(): # forward pass output, _ = network(x_batch) print('forward pass completed') # print(self.config.beta_type) beta = get_beta(beta_type=self.beta_type, m=m, batch_idx=0, epoch=1, n_epochs=1) # print(f'Beta: {beta}') kl_posterior += self.loss(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=beta) loss_value = kl_posterior.item() self.result = (self.genome.key, loss_value)
def test_genome_conversion_fails_when_some_parameter_is_different(self): original_genome = generate_genome_given_graph(graph=((-1, 3), (-2, 3), (3, 2), (-1, 2), (2, 0), (2, 1), (-1, 1), (-2, 3)), connection_weights=(1.0, 2.0, 3.0, 0.5, 0.75, 0.8, 0.6, 0.9)) network = ComplexStochasticNetwork(genome=original_genome) network.layer_0.qw_mean[0, 1] = 0.33 self.assertRaises(Exception, convert_stochastic_network_to_genome, network, original_genome)
def test_network_structure_without_all_output(self): self.config.n_output = 2 graph = ((-1, 1), (-2, 1)) weights = (1, 1) genome = generate_genome_given_graph(graph, weights) model = ComplexStochasticNetwork(genome=genome) n_samples = 1 input_data = torch.tensor([[1.0, 1.0]]) input_data = input_data.view(-1, genome.n_input).repeat(n_samples, 1) y, _ = model(input_data)
def _evaluate_genome_parallel(genome: Genome, loss, beta_type, problem_type, is_testing, batch_size=10000, n_samples=10, is_gpu=False): ''' Calculates: KL-Div(q(w)||p(w|D)) Uses the VariationalInferenceLoss class (not the alternative) ''' # kl_posterior = 0 # # kl_qw_pw = compute_kl_qw_pw(genome=genome) # setup network network = ComplexStochasticNetwork(genome=genome) if is_gpu: network.cuda() m = math.ceil(len(dataset.x) / batch_size) network.eval() # calculate Data log-likelihood (p(y*|x*,D)) if is_testing: x_batch, y_batch = dataset.x_test, dataset.y_test else: x_batch, y_batch = dataset.x_train, dataset.y_train x_batch, y_batch = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=problem_type, is_gpu=is_gpu, n_input=genome.n_input, n_output=genome.n_output, n_samples=n_samples) with torch.no_grad(): # forward pass output, kl_qw_pw = network(x_batch) output, _, y_batch = _process_output_data(output, y_true=y_batch, n_samples=n_samples, n_output=genome.n_output, problem_type=problem_type, is_pass=True) beta = get_beta(beta_type=beta_type, m=m, batch_idx=0, epoch=1, n_epochs=1) kl_posterior = loss(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=beta) loss_value = kl_posterior.item() return loss_value
def test_network_structure_miso_2(self): self.config.n_output = 2 is_cuda = True genome = generate_genome_given_graph( graph=((-1, 2), (-2, 2), (2, 0), (2, 1), (-1, 0), (-1, 1), (-2, 1), (-2, 0)), connection_weights=(1.0, 2.0, 3.0, 4.0, 0, 1.0, 0, 1.0)) n_samples = 1 input_data = torch.tensor([[1.0, 1.0]]) input_data = input_data.view(-1, genome.n_input).repeat(n_samples, 1) model = ComplexStochasticNetwork(genome=genome, is_trainable=True, is_cuda=is_cuda) self.assertEqual(model.layers[0].input_keys, [2, -2, -1]) self.assertTrue( torch.allclose(model.layers[0].weight_mean, torch.tensor([[3.0, 1.0, 0.0], [4.0, 0.0, 1.0]]), atol=1e-02)) self.assertEqual(model.layers[1].input_keys, [-2, -1]) self.assertTrue( torch.allclose(model.layers[1].weight_mean, torch.tensor([[2.0, 1.0]]), atol=1e-02)) if is_cuda: input_data = input_data.cuda() model.cuda() y, _ = model(input_data) expected_y = torch.tensor([[10.0, 13.0]]) if is_cuda: y = y.cpu() self.assertTrue(torch.allclose(y, expected_y, atol=1e-02))
def test_network_structure_miso(self): genome = generate_genome_given_graph(graph=((-1, 1), (-2, 1), (1, 0), (-1, 0)), connection_weights=(1.0, 2.0, 3.0, 4.0)) n_samples = 1 input_data = torch.tensor([[1.0, 1.0]]) input_data = input_data.view(-1, genome.n_input).repeat(n_samples, 1) model = ComplexStochasticNetwork(genome=genome) y, _ = model(input_data) expected_y = 13.0 self.assertAlmostEqual(expected_y, y.numpy()[0][0], places=2)
def test_standard_network_to_genome_to_stochastic_network(self): config = create_configuration(filename='/classification-miso.json') n_neurons_per_layer = 3 network = FeedForward(n_input=config.n_input, n_output=config.n_output, n_neurons_per_layer=n_neurons_per_layer, n_hidden_layers=1) std = 0.1 genome = get_genome_from_standard_network(network, std=std) stochastic_network = ComplexStochasticNetwork(genome=genome) parameters = network.state_dict() self.assertTrue(torch.allclose(parameters['layer_0.weight'], stochastic_network.layer_0.qw_mean, atol=1e-02)) self.assertTrue(torch.allclose(parameters['layer_1.weight'], stochastic_network.layer_1.qw_mean, atol=1e-02))
def test_network_structure_miso_3(self): graph = ((-1, 1), (-2, 1), (-1, 2), (-2, 2), (1, 4), (1, 3), (2, 3), (2, 4), (-1, 3), (3, 0), (4, 0), (-1, 0), (1, 0)) connection_weights = (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 1.5, 1.0, 2.0, 3.0, 4.0) genome = generate_genome_given_graph(graph, connection_weights) n_samples = 100 input_data = torch.tensor([[1.0, 1.0]]) input_data = input_data.view(-1, genome.n_input).repeat(n_samples, 1) model = ComplexStochasticNetwork(genome=genome) y, _ = model(input_data) expected_y = 225 self.assertAlmostEqual(expected_y, y.mean().item(), delta=10)
def test_non_existing_connections_are_updated_2(self): connections = ((-1, 1), (-2, 1)) genome = generate_genome_given_graph(graph=connections, connection_weights=(1.0, 2.0)) dataset = get_dataset(dataset=self.config.dataset, train_percentage=0.1, testing=False, noise=0.0) stg_trainer = StandardTrainer(dataset=dataset, n_samples=self.config.n_samples, n_output=genome.n_output, problem_type=self.config.problem_type, beta=self.config.beta, n_epochs=self.n_epochs, is_cuda=False) stg_trainer.train(genome) best_network = stg_trainer.get_best_network() new_genome = convert_stochastic_network_to_genome(network=best_network, original_genome=genome, fitness=-stg_trainer.best_loss_val, fix_std=genome.genome_config.fix_std) network_mutated = ComplexStochasticNetwork(genome=new_genome) self.assertEqual(type(new_genome), Genome) self.assertTrue(equal(best_network, network_mutated))
def evaluate_genome(genome: Genome, loss, beta_type, problem_type, batch_size=10000, n_samples=10, is_gpu=False): ''' Calculates: KL-Div(q(w)||p(w|D)) Uses the VariationalInferenceLoss class (not the alternative) # ''' # dataset = get_dataset(genome.genome_config.dataset, testing=True) # dataset.generate_data() kl_posterior = 0 kl_qw_pw = compute_kl_qw_pw(genome=genome) # setup network network = ComplexStochasticNetwork(genome=genome) if is_gpu: network.cuda() m = math.ceil(len(dataset.x) / batch_size) network.eval() # calculate Data log-likelihood (p(y*|x*,D)) x_batch, y_batch = dataset.x, dataset.y x_batch, y_batch = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=problem_type, is_gpu=is_gpu, n_input=genome.n_input, n_output=genome.n_output, n_samples=n_samples) if is_gpu: x_batch, y_batch = x_batch.cuda(), y_batch.cuda() with torch.no_grad(): # forward pass output, _ = network(x_batch) # print(self.config.beta_type) beta = get_beta(beta_type=beta_type, m=m, batch_idx=0, epoch=1, n_epochs=1) # print(f'Beta: {beta}') kl_posterior += loss(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=beta) loss_value = kl_posterior.item() return loss_value
class StandardTrainer: def __init__(self, dataset, n_epochs, n_output, problem_type, n_samples, beta, is_cuda, weight_decay=0.0005, lr=0.01): self.dataset = dataset self.is_cuda = is_cuda self.lr = lr self.weight_decay = weight_decay self.n_epochs = n_epochs self.n_output = n_output self.problem_type = problem_type self.n_samples = n_samples self.beta = beta self.network = None self.criterion = None self.optimizer = None self.final_loss = None self.last_update = 0 self.best_loss_val = 10000 self.best_network_state = None def train(self, genome): kl_qw_pw = compute_kl_qw_pw(genome=genome) # setup network self.network = ComplexStochasticNetwork(genome=genome, is_trainable=True, is_cuda=self.is_cuda) self.criterion = get_loss(problem_type=self.problem_type) if self.is_cuda: self.network.cuda() self.criterion.cuda() self.optimizer = Adam(self.network.parameters(), lr=self.lr, weight_decay=self.weight_decay) x_batch, y_batch = self.dataset.x_train, self.dataset.y_train x_train, x_val, y_train, y_val = self.train_val_split( x_batch, y_batch, problem_type=self.problem_type, val_ratio=0.2) x_train, _ = _prepare_batch_data( x_batch=x_train, y_batch=y_train, problem_type=self.problem_type, is_gpu=False, # this could be removed n_input=genome.n_input, n_output=genome.n_output, n_samples=self.n_samples) x_val, _ = _prepare_batch_data(x_batch=x_val, y_batch=y_val, problem_type=self.problem_type, is_gpu=False, n_input=genome.n_input, n_output=genome.n_output, n_samples=self.n_samples) if self.is_cuda: x_train = x_train.cuda() y_train = y_train.cuda() x_val = x_val.cuda() y_val = y_val.cuda() self.network.train() for epoch in range(self.n_epochs): loss_epoch = self._train_one(x_train, y_train, kl_qw_pw) # if epoch % 10 == 0: _, _, _, loss_val = self._evaluate(x_val, y_val, network=self.network) if loss_val < self.best_loss_val: self.best_loss_val = loss_val self.best_network_state = copy.deepcopy( self.network.state_dict()) self.last_update = epoch if epoch - self.last_update > N_EPOCHS_WITHOUT_IMPROVING: print( f'Breaking training as not improving for {N_EPOCHS_WITHOUT_IMPROVING} epochs' ) break if epoch % 200 == 0: print(f'Epoch = {epoch}. Training Loss: {loss_epoch}. ' f'Best Val. Loss: {self.best_loss_val}') self.network.clear_non_existing_weights( clear_grad=False) # reset non-existing weights self.final_loss = loss_epoch print(f'Final Epoch = {epoch}. Training Error: {self.final_loss}') def _train_one(self, x_batch, y_batch, kl_qw_pw): # TODO: the kl_qw_pw returned by the network gives problems with backprop. output, kl_qw_pw = self.network(x_batch) output, _ = calculate_multinomial(output, self.n_samples, self.n_output) loss = self.criterion(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=self.beta) loss_epoch = loss.data.item() self.optimizer.zero_grad() loss.backward() # Backward Propagation # self.network.clear_non_existing_weights() # zero_grad for those unexistent parameters self.optimizer.step() # Optimizer update # self.network.clear_non_existing_weights(clear_grad=False) # reset non-existing weights return loss_epoch def _evaluate(self, x_batch, y_batch, network): network.eval() chunks_x = [] chunks_y_pred = [] chunks_y_true = [] with torch.no_grad(): output, kl_qw_pw = network(x_batch) output, _ = calculate_multinomial(output, self.n_samples, self.n_output) # output, _, y_batch = _process_output_data(output, y_true=y_batch, n_samples=n_samples, # n_output=genome.n_output, problem_type=problem_type, is_pass=is_pass) loss = self.criterion(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=self.beta) # loss = self.criterion(output, y_batch) loss_epoch = loss.data.item() chunks_x.append(x_batch) chunks_y_pred.append(output) chunks_y_true.append(y_batch) x = torch.cat(chunks_x, dim=0) y_pred = torch.cat(chunks_y_pred, dim=0) y_true = torch.cat(chunks_y_true, dim=0) return x, y_true, y_pred, loss_epoch def train_val_split(self, x_batch, y_batch, problem_type, val_ratio=0.2): x_train, x_val, y_train, y_val = train_test_split(x_batch.numpy(), y_batch.numpy(), test_size=val_ratio) x_train = torch.tensor(x_train).float() x_val = torch.tensor(x_val).float() if problem_type == 'classification': y_train = torch.tensor(y_train).long() y_val = torch.tensor(y_val).long() elif problem_type == 'regression': y_train = torch.tensor(y_train).float() y_val = torch.tensor(y_val).float() return x_train, x_val, y_train, y_val def get_best_network(self): network = ComplexStochasticNetwork(genome=self.network.genome, is_trainable=True) network.load_state_dict(self.best_network_state) return network
def _get_number_of_layers(self, genome): network = ComplexStochasticNetwork(genome=genome) return len(network.layers)
def _generate_row(self, report, absolute_best=True): execution_id = report.execution_id correlation_id = report.correlation_id if absolute_best: genome_dict = report.data['best_individual'] best_individual_fitness = report.data['best_individual_fitness'] else: genome_dict = report.data['fine_tuning'][ 'best_genome_before_fine_tuning'] best_individual_fitness = report.data['fine_tuning'][ 'best_fitness_before_fine_tuning'] genome = Genome.from_dict(genome_dict) config = genome.genome_config self.configurations[execution_id] = config self.best_genomes[execution_id] = genome self.best_networks[execution_id] = ComplexStochasticNetwork( genome=genome) set_configuration(config) # evaluate genome loss = get_loss(problem_type=config.problem_type) print(f'Train percentage: {config.train_percentage}') print(f'Random state: {config.dataset_random_state}') dataset = get_dataset(config.dataset, train_percentage=config.train_percentage, testing=True, random_state=config.dataset_random_state, noise=config.noise, label_noise=config.label_noise) x, y_true, y_pred_prob, loss_value = evaluate_genome( genome=genome, dataset=dataset, loss=loss, problem_type=config.problem_type, beta_type=config.beta_type, batch_size=config.batch_size, n_samples=self.n_samples, is_gpu=config.is_gpu, is_testing=True, return_all=True) y_pred = torch.argmax(y_pred_prob, dim=1) train_percentage = config.train_percentage noise = config.noise label_noise = config.label_noise duration = report.duration n_parameters = genome.calculate_number_of_parameters() n_nodes = genome.n_bias_parameters // 2 n_connections = genome.n_weight_parameters // 2 n_layers = self._get_number_of_layers(genome) mean_genome_std = get_mean_std(genome) end_condition = report.data['end_condition'] chunk = pd.DataFrame( { 'correlation_id': correlation_id, 'execution_id': execution_id, 'train_percentage': train_percentage, 'noise': noise, 'label_noise': label_noise, 'is_bayesian': False if config.fix_std else True, 'beta': config.beta, 'loss_training': -best_individual_fitness, 'loss_testing': loss_value, 'duration': duration, 'end_condition': end_condition, 'n_parameters': n_parameters, 'n_nodes': n_nodes, 'n_connections': n_connections, 'n_layers': n_layers, 'mean_genome_std': mean_genome_std, }, index=[0]) if config.problem_type == 'classification': chunk['accuracy'] = accuracy_score(y_true, y_pred) * 100 chunk['precision'] = precision_score(y_true, y_pred, average='weighted') chunk['recall'] = recall_score(y_true, y_pred, average='weighted') chunk['f1'] = f1_score(y_true, y_pred, average='weighted') ece, _ = expected_calibration_error( y_true.numpy(), y_pred_prob.numpy(), n_bins=ECE_N_BINS, uniform_binning=UNIFORM_BINNING) chunk['ece'] = ece else: chunk['mse'] = mean_squared_error(y_true, y_pred) chunk['mae'] = mean_absolute_error(y_true, y_pred) return chunk
def train(self, genome): kl_qw_pw = compute_kl_qw_pw(genome=genome) # setup network self.network = ComplexStochasticNetwork(genome=genome, is_trainable=True, is_cuda=self.is_cuda) self.criterion = get_loss(problem_type=self.problem_type) if self.is_cuda: self.network.cuda() self.criterion.cuda() self.optimizer = Adam(self.network.parameters(), lr=self.lr, weight_decay=self.weight_decay) x_batch, y_batch = self.dataset.x_train, self.dataset.y_train x_train, x_val, y_train, y_val = self.train_val_split( x_batch, y_batch, problem_type=self.problem_type, val_ratio=0.2) x_train, _ = _prepare_batch_data( x_batch=x_train, y_batch=y_train, problem_type=self.problem_type, is_gpu=False, # this could be removed n_input=genome.n_input, n_output=genome.n_output, n_samples=self.n_samples) x_val, _ = _prepare_batch_data(x_batch=x_val, y_batch=y_val, problem_type=self.problem_type, is_gpu=False, n_input=genome.n_input, n_output=genome.n_output, n_samples=self.n_samples) if self.is_cuda: x_train = x_train.cuda() y_train = y_train.cuda() x_val = x_val.cuda() y_val = y_val.cuda() self.network.train() for epoch in range(self.n_epochs): loss_epoch = self._train_one(x_train, y_train, kl_qw_pw) # if epoch % 10 == 0: _, _, _, loss_val = self._evaluate(x_val, y_val, network=self.network) if loss_val < self.best_loss_val: self.best_loss_val = loss_val self.best_network_state = copy.deepcopy( self.network.state_dict()) self.last_update = epoch if epoch - self.last_update > N_EPOCHS_WITHOUT_IMPROVING: print( f'Breaking training as not improving for {N_EPOCHS_WITHOUT_IMPROVING} epochs' ) break if epoch % 200 == 0: print(f'Epoch = {epoch}. Training Loss: {loss_epoch}. ' f'Best Val. Loss: {self.best_loss_val}') self.network.clear_non_existing_weights( clear_grad=False) # reset non-existing weights self.final_loss = loss_epoch print(f'Final Epoch = {epoch}. Training Error: {self.final_loss}')
def evaluate_genome_with_dataloader(genome: Genome, data_loader, loss, beta_type, problem_type, batch_size=10000, n_samples=10, is_gpu=False, return_all=False): ''' Calculates: KL-Div(q(w)||p(w|D)) Uses the VariationalInferenceLoss class (not the alternative) ''' kl_posterior = 0 kl_qw_pw = compute_kl_qw_pw(genome=genome) # setup network network = ComplexStochasticNetwork(genome=genome) if is_gpu: network.cuda() m = math.ceil(len(data_loader) / batch_size) network.eval() chunks_x = [] chunks_y_pred = [] chunks_y_true = [] # calculate Data log-likelihood (p(y*|x*,D)) for batch_idx, (x_batch, y_batch) in enumerate(data_loader): x_batch, y_batch = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=problem_type, is_gpu=is_gpu, n_input=genome.n_input, n_output=genome.n_output, n_samples=n_samples) with torch.no_grad(): # forward pass output, _ = network(x_batch) beta = get_beta(beta_type=beta_type, m=m, batch_idx=batch_idx, epoch=1, n_epochs=1) kl_posterior += loss(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=beta) if return_all: chunks_x.append(x_batch) chunks_y_pred.append(output) chunks_y_true.append(y_batch) loss_value = kl_posterior.item() if return_all: x = torch.cat(chunks_x, dim=0) y_pred = torch.cat(chunks_y_pred, dim=0) y_true = torch.cat(chunks_y_true, dim=0) return x, y_true, y_pred, loss_value return loss_value
def _get_network(self): return ComplexStochasticNetwork(genome=self.genome)
def get_best_network(self): network = ComplexStochasticNetwork(genome=self.network.genome, is_trainable=True) network.load_state_dict(self.best_network_state) return network