def _run(self): self.best_network_rep = None self.best_loss_val_rep = 100000 self._initialize() x_batch, y_batch = self.dataset.x_train, self.dataset.y_train x_train, x_val, y_train, y_val = self.train_val_split(x_batch, y_batch, val_ratio=0.2) x_train, y_train = _prepare_batch_data( x_batch=x_train, y_batch=y_train, problem_type=self.config.problem_type, is_gpu=self.config.is_gpu, n_input=self.config.n_input, n_output=self.config.n_output, n_samples=1) x_val, y_val = _prepare_batch_data( x_batch=x_val, y_batch=y_val, problem_type=self.config.problem_type, is_gpu=self.config.is_gpu, n_input=self.config.n_input, n_output=self.config.n_output, n_samples=1) if self.is_cuda: x_train = x_train.cuda() y_train = y_train.cuda() x_val = x_val.cuda() y_val = y_val.cuda() # train for epoch in range(self.n_epochs): loss_train = self._train_one(x_train, y_train) _, _, _, loss_val = self._evaluate(x_val, y_val, network=self.network) self.backprop_report.report_epoch(epoch, loss_train, loss_val) if loss_val < self.best_loss_val_rep: self.best_loss_val_rep = loss_val self.best_network_rep = copy.deepcopy(self.network) self.last_update = epoch print(f'New best network: {loss_val}') if epoch - self.last_update > N_EPOCHS_WITHOUT_IMPROVING: print( f'Breaking training as not improving for {N_EPOCHS_WITHOUT_IMPROVING} epochs' ) break if epoch % 100 == 0: print(f'Epoch = {epoch}. Error: {loss_train}') print(f'Final Train Error: {loss_train}') print(f'Best Val Error: {self.best_loss_val_rep}')
def evaluate(self): x_batch, y_batch = self.dataset.x_test, self.dataset.y_test x_batch, y_batch = _prepare_batch_data( x_batch=x_batch, y_batch=y_batch, problem_type=self.config.problem_type, is_gpu=self.config.is_gpu, n_input=self.config.n_input, n_output=self.config.n_output, n_samples=1) if self.is_cuda: x_batch = x_batch.cuda() y_batch = y_batch.cuda() x, y_true, y_pred, _ = self._evaluate(x_batch, y_batch, network=self.best_network) if self.is_cuda: x = x.cpu() y_pred = y_pred.cpu() y_true = y_true.cpu() return x, y_true, y_pred
def _evaluate_genome_parallel_jupyneat(genome: dict, loss, beta_type, problem_type, n_input, n_output, activation, batch_size=10000, n_samples=10, is_gpu=False): ''' Calculates: KL-Div(q(w)||p(w|D)) Uses the VariationalInferenceLoss class (not the alternative) ''' kl_posterior = 0 # TODO: fix # kl_qw_pw = compute_kl_qw_pw(genome=genome) kl_qw_pw = 0.0 # setup network network = ComplexStochasticNetworkJupyneat(genome=genome, n_input=n_input, n_output=n_output, activation_type=activation) if is_gpu: network.cuda() m = math.ceil(len(dataset.x) / batch_size) network.eval() # calculate Data log-likelihood (p(y*|x*,D)) x_batch, y_batch = dataset.x, dataset.y x_batch, y_batch = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=problem_type, is_gpu=is_gpu, n_input=n_input, n_output=n_output, n_samples=n_samples) if is_gpu: x_batch, y_batch = x_batch.cuda(), y_batch.cuda() with torch.no_grad(): # forward pass output, _ = network(x_batch) beta = get_beta(beta_type=beta_type, m=m, batch_idx=0, epoch=1, n_epochs=1) kl_posterior += loss(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=beta) loss_value = kl_posterior.item() return loss_value
def _evaluate_genome_parallel(genome: Genome, loss, beta_type, problem_type, is_testing, batch_size=10000, n_samples=10, is_gpu=False): ''' Calculates: KL-Div(q(w)||p(w|D)) Uses the VariationalInferenceLoss class (not the alternative) ''' # kl_posterior = 0 # # kl_qw_pw = compute_kl_qw_pw(genome=genome) # setup network network = ComplexStochasticNetwork(genome=genome) if is_gpu: network.cuda() m = math.ceil(len(dataset.x) / batch_size) network.eval() # calculate Data log-likelihood (p(y*|x*,D)) if is_testing: x_batch, y_batch = dataset.x_test, dataset.y_test else: x_batch, y_batch = dataset.x_train, dataset.y_train x_batch, y_batch = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=problem_type, is_gpu=is_gpu, n_input=genome.n_input, n_output=genome.n_output, n_samples=n_samples) with torch.no_grad(): # forward pass output, kl_qw_pw = network(x_batch) output, _, y_batch = _process_output_data(output, y_true=y_batch, n_samples=n_samples, n_output=genome.n_output, problem_type=problem_type, is_pass=True) beta = get_beta(beta_type=beta_type, m=m, batch_idx=0, epoch=1, n_epochs=1) kl_posterior = loss(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=beta) loss_value = kl_posterior.item() return loss_value
def calculate_prediction_distribution(network, dataset, problem_type, is_testing, n_samples=1000, use_sigmoid=False): ''' Calculate Predictive Distribution for a network and dataset ''' # setup network network.eval() # calculate Data log-likelihood (p(y*|x*,D)) if is_testing: x_batch, y_batch = dataset.x_test, dataset.y_test else: x_batch, y_batch = dataset.x_train, dataset.y_train x_batch, y_batch = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=problem_type, is_gpu=False, n_input=network.n_input, n_output=network.n_output, n_samples=n_samples) chunks_x = [] chunks_output_distribution = [] chunks_y_true = [] with torch.no_grad(): # forward pass output, _ = network(x_batch) _, output_distribution, y_batch = _process_output_data( output, y_true=y_batch, n_samples=n_samples, n_output=network.n_output, problem_type=problem_type, is_pass=True) chunks_x.append(x_batch) chunks_output_distribution.append(output_distribution) chunks_y_true.append(y_batch) x = torch.cat(chunks_x, dim=0) output_distribution = torch.cat(chunks_output_distribution, dim=0) y_true = torch.cat(chunks_y_true, dim=0) return x, y_true, output_distribution
def run(self) -> None: ''' Calculates: KL-Div(q(w)||p(w|D)) Uses the VariationalInferenceLoss class (not the alternative) ''' # from experiments.multiprocessing_utils import ForkedPdb; ForkedPdb().set_trace() kl_posterior = 0 kl_qw_pw = compute_kl_qw_pw(genome=self.genome) # setup network network = ComplexStochasticNetwork(genome=self.genome) # m = math.ceil(len(self.dataset) / self.batch_size) m = math.ceil(len(self.x) / self.batch_size) network.eval() # calculate Data log-likelihood (p(y*|x*,D)) # x_batch, y_batch = self.dataset.x, self.dataset.y x_batch, y_batch = self.x, self.y x_batch, y_batch = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=self.problem_type, is_gpu=self.is_gpu, n_input=self.genome.n_input, n_output=self.genome.n_output, n_samples=self.n_samples) print('running forward pass') with torch.no_grad(): # forward pass output, _ = network(x_batch) print('forward pass completed') # print(self.config.beta_type) beta = get_beta(beta_type=self.beta_type, m=m, batch_idx=0, epoch=1, n_epochs=1) # print(f'Beta: {beta}') kl_posterior += self.loss(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=beta) loss_value = kl_posterior.item() self.result = (self.genome.key, loss_value)
def evaluate_genome(genome: Genome, loss, beta_type, problem_type, batch_size=10000, n_samples=10, is_gpu=False): ''' Calculates: KL-Div(q(w)||p(w|D)) Uses the VariationalInferenceLoss class (not the alternative) # ''' # dataset = get_dataset(genome.genome_config.dataset, testing=True) # dataset.generate_data() kl_posterior = 0 kl_qw_pw = compute_kl_qw_pw(genome=genome) # setup network network = ComplexStochasticNetwork(genome=genome) if is_gpu: network.cuda() m = math.ceil(len(dataset.x) / batch_size) network.eval() # calculate Data log-likelihood (p(y*|x*,D)) x_batch, y_batch = dataset.x, dataset.y x_batch, y_batch = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=problem_type, is_gpu=is_gpu, n_input=genome.n_input, n_output=genome.n_output, n_samples=n_samples) if is_gpu: x_batch, y_batch = x_batch.cuda(), y_batch.cuda() with torch.no_grad(): # forward pass output, _ = network(x_batch) # print(self.config.beta_type) beta = get_beta(beta_type=beta_type, m=m, batch_idx=0, epoch=1, n_epochs=1) # print(f'Beta: {beta}') kl_posterior += loss(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=beta) loss_value = kl_posterior.item() return loss_value
def evaluate(self, is_testing=True): if is_testing: x_batch, y_batch = self.dataset.x_test, self.dataset.y_test else: x_batch, y_batch = self.dataset.x_train, self.dataset.y_train x_batch, _ = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=self.config.problem_type, is_gpu=self.config.is_gpu, n_input=self.config.n_input, n_output=self.config.n_output, n_samples=EVALUATION_N_SAMPLES) network = self.best_network # network = ProbabilisticFeedForward(n_input=self.config.n_input, n_output=self.config.n_output, # is_cuda=self.is_cuda, # n_neurons_per_layer=self.n_neurons_per_layer, # n_hidden_layers=self.n_hidden_layers) # network.load_state_dict(self.best_network) if self.is_cuda: network.cuda() x_batch = x_batch.cuda() y_batch = y_batch.cuda() x, y_true, y_pred, loss = self._evaluate(x_batch, y_batch, network=network) if self.is_cuda: x = x.cpu() y_true = y_true.cpu() y_pred = y_pred.cpu() return x, y_true, y_pred
def _run(self): # TODO: remove (just debugging) # torch.autograd.set_detect_anomaly(True) self._initialize() x_batch, y_batch = self.dataset.x_train, self.dataset.y_train x_train, x_val, y_train, y_val = self.train_val_split(x_batch, y_batch, val_ratio=0.2) x_train, _ = _prepare_batch_data(x_batch=x_train, y_batch=y_train, problem_type=self.config.problem_type, is_gpu=self.config.is_gpu, n_input=self.config.n_input, n_output=self.config.n_output, n_samples=self.config.n_samples) x_val, _ = _prepare_batch_data(x_batch=x_val, y_batch=y_val, problem_type=self.config.problem_type, is_gpu=self.config.is_gpu, n_input=self.config.n_input, n_output=self.config.n_output, n_samples=EVALUATION_N_SAMPLES) if self.is_cuda: x_train = x_train.cuda() y_train = y_train.cuda() x_val = x_val.cuda() y_val = y_val.cuda() if self.config.problem_type == 'classification': self._train_one = self._train_one_classification elif self.config.problem_type == 'regression': self._train_one = self._train_one_regression # train for epoch in range(self.n_epochs): loss_train = self._train_one(x_train, y_train) _, _, _, loss_val = self._evaluate(x_val, y_val, network=self.network) self.backprop_report.report_epoch(epoch, loss_train, loss_val) if loss_val < self.best_loss_val_rep: self.best_loss_val_rep = loss_val self.best_network_rep = copy.deepcopy(self.network) self.last_update = epoch print(f'New best network: {loss_val}') if epoch - self.last_update > N_EPOCHS_WITHOUT_IMPROVING: print( f'Breaking training as not improving for {N_EPOCHS_WITHOUT_IMPROVING} epochs' ) break if epoch % 100 == 0: print(f'Epoch = {epoch}. Error: {loss_train}') print(f'Final Train Error: {loss_train}') print(f'Best Val Error: {self.best_loss_val_rep}')
def evaluate_genome_with_dataloader(genome: Genome, data_loader, loss, beta_type, problem_type, batch_size=10000, n_samples=10, is_gpu=False, return_all=False): ''' Calculates: KL-Div(q(w)||p(w|D)) Uses the VariationalInferenceLoss class (not the alternative) ''' kl_posterior = 0 kl_qw_pw = compute_kl_qw_pw(genome=genome) # setup network network = ComplexStochasticNetwork(genome=genome) if is_gpu: network.cuda() m = math.ceil(len(data_loader) / batch_size) network.eval() chunks_x = [] chunks_y_pred = [] chunks_y_true = [] # calculate Data log-likelihood (p(y*|x*,D)) for batch_idx, (x_batch, y_batch) in enumerate(data_loader): x_batch, y_batch = _prepare_batch_data(x_batch=x_batch, y_batch=y_batch, problem_type=problem_type, is_gpu=is_gpu, n_input=genome.n_input, n_output=genome.n_output, n_samples=n_samples) with torch.no_grad(): # forward pass output, _ = network(x_batch) beta = get_beta(beta_type=beta_type, m=m, batch_idx=batch_idx, epoch=1, n_epochs=1) kl_posterior += loss(y_pred=output, y_true=y_batch, kl_qw_pw=kl_qw_pw, beta=beta) if return_all: chunks_x.append(x_batch) chunks_y_pred.append(output) chunks_y_true.append(y_batch) loss_value = kl_posterior.item() if return_all: x = torch.cat(chunks_x, dim=0) y_pred = torch.cat(chunks_y_pred, dim=0) y_true = torch.cat(chunks_y_true, dim=0) return x, y_true, y_pred, loss_value return loss_value
def train(self, genome): kl_qw_pw = compute_kl_qw_pw(genome=genome) # setup network self.network = ComplexStochasticNetwork(genome=genome, is_trainable=True, is_cuda=self.is_cuda) self.criterion = get_loss(problem_type=self.problem_type) if self.is_cuda: self.network.cuda() self.criterion.cuda() self.optimizer = Adam(self.network.parameters(), lr=self.lr, weight_decay=self.weight_decay) x_batch, y_batch = self.dataset.x_train, self.dataset.y_train x_train, x_val, y_train, y_val = self.train_val_split( x_batch, y_batch, problem_type=self.problem_type, val_ratio=0.2) x_train, _ = _prepare_batch_data( x_batch=x_train, y_batch=y_train, problem_type=self.problem_type, is_gpu=False, # this could be removed n_input=genome.n_input, n_output=genome.n_output, n_samples=self.n_samples) x_val, _ = _prepare_batch_data(x_batch=x_val, y_batch=y_val, problem_type=self.problem_type, is_gpu=False, n_input=genome.n_input, n_output=genome.n_output, n_samples=self.n_samples) if self.is_cuda: x_train = x_train.cuda() y_train = y_train.cuda() x_val = x_val.cuda() y_val = y_val.cuda() self.network.train() for epoch in range(self.n_epochs): loss_epoch = self._train_one(x_train, y_train, kl_qw_pw) # if epoch % 10 == 0: _, _, _, loss_val = self._evaluate(x_val, y_val, network=self.network) if loss_val < self.best_loss_val: self.best_loss_val = loss_val self.best_network_state = copy.deepcopy( self.network.state_dict()) self.last_update = epoch if epoch - self.last_update > N_EPOCHS_WITHOUT_IMPROVING: print( f'Breaking training as not improving for {N_EPOCHS_WITHOUT_IMPROVING} epochs' ) break if epoch % 200 == 0: print(f'Epoch = {epoch}. Training Loss: {loss_epoch}. ' f'Best Val. Loss: {self.best_loss_val}') self.network.clear_non_existing_weights( clear_grad=False) # reset non-existing weights self.final_loss = loss_epoch print(f'Final Epoch = {epoch}. Training Error: {self.final_loss}')