Esempio n. 1
0
    def test_loss_is_equal_when_no_jumps(self):
        connections = ((-1, 2), (-2, 2), (2, 0), (2, 1))
        genome = generate_genome_given_graph(graph=connections,
                                             connection_weights=(1.0, 2.0, 3.0, 0.5))
        dataset = get_dataset(dataset=self.config.dataset, train_percentage=0.1, testing=False, noise=0.0,
                              random_state=self.config.dataset_random_state)
        n_samples = 100
        stg_trainer = StandardTrainer(dataset=dataset, n_samples=self.config.n_samples,
                                      n_output=genome.n_output,
                                      problem_type=self.config.problem_type,
                                      beta=self.config.beta,
                                      n_epochs=100, is_cuda=False)
        stg_trainer.train(genome)
        best_network = stg_trainer.get_best_network()
        new_genome = convert_stochastic_network_to_genome(network=best_network,
                                                          original_genome=genome,
                                                          fitness=-stg_trainer.best_loss_val,
                                                          fix_std=genome.genome_config.fix_std)

        # evaluate genome
        loss = get_loss(problem_type=self.config.problem_type)

        loss_value = evaluate_genome(genome=new_genome,
                                     dataset=dataset,
                                     loss=loss,
                                     problem_type=self.config.problem_type,
                                     beta_type=self.config.beta_type,
                                     batch_size=self.config.batch_size,
                                     n_samples=n_samples,
                                     is_gpu=False,
                                     is_testing=False,
                                     return_all=False,
                                     is_pass=True)
        self.assertAlmostEqual(loss_value, -new_genome.fitness, places=0)
Esempio n. 2
0
 def _show_classification_metrics(self, config):
     dataset = get_dataset(config.dataset,
                           train_percentage=config.train_percentage,
                           testing=False,
                           random_state=config.dataset_random_state,
                           noise=config.noise,
                           label_noise=config.label_noise)
     loss = get_loss(problem_type=config.problem_type)
     x, y_true, y_pred, loss_value = evaluate_genome(
         genome=self.best_individual,
         dataset=dataset,
         loss=loss,
         problem_type=config.problem_type,
         beta_type=config.beta_type,
         batch_size=config.batch_size,
         n_samples=config.n_samples,
         is_gpu=config.is_gpu,
         is_testing=True,
         return_all=True)
     y_pred = torch.argmax(y_pred, dim=1)
     from sklearn.metrics import confusion_matrix, accuracy_score
     # print(f'Loss: {loss_value}')
     confusion_m = confusion_matrix(y_true, y_pred)
     acc = accuracy_score(y_true, y_pred) * 100
     self.metrics_best['confusion_matrix'] = confusion_m
     self.metrics_best['accuracy'] = acc
     print('Confusion Matrix:')
     print(confusion_m)
     print(f'Accuracy: {acc} %')
Esempio n. 3
0
def _parallelize_with_workers(n_cpus, population):
    # create workers
    manager = Manager()
    task_queue = manager.Queue()
    exit_queue = manager.Queue()
    exception_queue = manager.Queue()
    results_queue = manager.Queue()
    # task_queue = SimpleQueue()
    # exit_queue = SimpleQueue()
    # exception_queue = SimpleQueue()
    # results_queue = SimpleQueue()
    workers = []
    for i in range(n_cpus):
        worker = Worker(task_queue=task_queue,
                        exit_queue=exit_queue,
                        exception_queue=exception_queue,
                        results_queue=results_queue)
        worker.start()
        workers.append(worker)
    for genome in population.values():
        task_queue.put(
            Task(
                genome=genome.copy(),
                dataset=None,
                x=torch.zeros((2, 784)).float(),
                y=torch.zeros(2).short(),
                # x=self.dataset.x.clone().detach(),
                # y=self.dataset.y.clone().detach(),
                loss=get_loss('classification'),
                beta_type='other',
                problem_type='classification',
                batch_size=100000,
                n_samples=20,
                is_gpu=False))
        # loss=self.loss,
        # beta_type=self.config.beta_type, problem_type=self.config.problem_type,
        # batch_size=self.batch_size, n_samples=n_samples, is_gpu=self.is_gpu))
    while not task_queue.empty():
        print('reading results from workers')
        print(task_queue.qsize())
        # for worker in workers:
        #     print(f'Is alive: {worker.is_alive()}')
        # print('reading results from workers')
        # print(task_queue.qsize())
        if not exception_queue.empty():
            exception = exception_queue.get()
            raise exception
        results = results_queue.get()
        print(results)
        population[results[0]].fitness = -results[1]
    # sys.exit()
    # terminate workers
    # TODO: workers can live during the whole process
    for i in range(n_cpus):
        print('sending exit conditions')
        exit_queue.put(1)
Esempio n. 4
0
def main():
    ALGORITHM_VERSION = 'bayes-neat'
    DATASET = 'mnist_binary'
    CORRELATION_ID = 'test'
    # execution_id = 'f6d2d5e3-26a3-4069-9071-b74009323761' # 2 hours run
    execution_id = 'bf516f54-c29b-4f88-949c-102ab67930b3'  # 10 hours run (learning architecture)
    # execution_id = '59cbe09c-4ee7-4e7e-9b17-26c866113cfe' # test-run
    # execution_id = 'c5551a6c-177b-4c2c-8ecd-a75e79ae0ec2'
    execution_id = '1f30c172-9056-4012-9651-0765527bd550'  # fitness -0.2
    execution_id = 'a91761a0-6201-4a1d-9293-5e713f305fbf'  # fitness -0.86
    execution_id = '991b275d-6282-4f7d-8e97-3908baf94726'
    report_repository = ReportRepository.create(project='neuro-evolution',
                                                logs_path=LOGS_PATH)
    report = report_repository.get_report(algorithm_version=ALGORITHM_VERSION,
                                          dataset=DATASET,
                                          correlation_id=CORRELATION_ID,
                                          execution_id=execution_id)
    genome_dict = report.data['best_individual']
    best_individual_fitness = report.data['best_individual_fitness']
    print(f'Fitness of best individual: {best_individual_fitness}')

    genome = Genome.create_from_julia_dict(genome_dict)
    # config = get_configuration()
    print(f'Execution id: {execution_id}')

    loss = get_loss(problem_type=config.problem_type)

    ##### EVALUATE ######
    print('Evaluating results')
    evaluate_with_parallel(genome, loss, config)

    dataset = get_dataset(config.dataset, testing=True)
    dataset.generate_data()
    # TODO: remove data-loader. If we want to sample the dataset in each generation, the we can create a
    #  middlelayer between evaluation and dataset
    x, y_true, y_pred, loss_value = evaluate_genome(
        genome=genome,
        dataset=dataset,
        loss=loss,
        problem_type=config.problem_type,
        beta_type=config.beta_type,
        batch_size=config.batch_size,
        n_samples=config.n_samples,
        is_gpu=config.is_gpu,
        return_all=True)
    y_pred = torch.argmax(y_pred, dim=1)

    from sklearn.metrics import confusion_matrix, accuracy_score
    print(f'Loss: {loss_value}')
    print('Confusion Matrix:')
    print(confusion_matrix(y_true, y_pred))

    print(f'Accuracy: {accuracy_score(y_true, y_pred) * 100} %')
Esempio n. 5
0
    def evaluate(self, population: dict):
        '''
        population: is a Dict{Int, Genome}
        '''
        logger.info(f'Population size is {len(population)}')
        # TODO: make n_samples increase with generation number
        n_samples = self.config.n_samples
        if self.parallel_evaluation:
            tasks = []
            for genome in population.values():
                # logger.debug(f'Genome {genome.key}: {genome.get_graph()}')
                x = (genome, get_loss(problem_type=self.config.problem_type),
                     self.config.beta_type, self.config.problem_type,
                     self.config.n_input, self.config.n_output,
                     self.config.node_activation, self.batch_size, n_samples,
                     self.is_gpu)
                tasks.append(x)

            # TODO: fix logging when using multiprocessing. Easy fix is to disable
            fitnesses = list(
                self.pool.imap(evaluate_genome_task_jupyneat,
                               tasks,
                               chunksize=len(population) // self.n_processes))

            for i, genome in enumerate(population.values()):
                genome['fitness'] = fitnesses[i]

        else:
            self.dataset = self._get_dataset()
            self.loss = self._get_loss()
            for genome in population.values():
                genome['fitness'] = -evaluate_genome_jupyneat(
                    genome=genome,
                    problem_type=self.config.problem_type,
                    n_input=self.config.n_input,
                    n_output=self.config.n_output,
                    activation_type=self.config.node_activation,
                    dataset=self.dataset,
                    loss=self.loss,
                    beta_type=self.config.beta_type,
                    batch_size=self.batch_size,
                    n_samples=n_samples,
                    is_gpu=self.is_gpu)

        return population
Esempio n. 6
0
def main():
    ALGORITHM_VERSION = 'bayes-neat'
    DATASET = 'classification_example_1'
    CORRELATION_ID = 'test'
    execution_id = '180186eb-46c8-4bbd-9f8a-26a36cbe57e4'

    report_repository = ReportRepository.create(project='neuro-evolution',
                                                logs_path=LOGS_PATH)
    report = report_repository.get_report(algorithm_version=ALGORITHM_VERSION,
                                          dataset=DATASET,
                                          correlation_id=CORRELATION_ID,
                                          execution_id=execution_id)
    genome = report.data['best_individual']
    best_individual_fitness = report.data['best_individual_fitness']
    print(f'Fitness of best individual: {best_individual_fitness}')

    config_dict = report.config
    config = jsons.load(config_dict, BaseConfiguration)

    loss = get_loss(problem_type=config.problem_type)
    dataset = get_dataset(config.dataset, testing=True)
    dataset.generate_data()

    x, y_true, y_pred, loss_value = evaluate_genome_jupyneat(
        genome=genome,
        problem_type=config.problem_type,
        n_input=config.n_input,
        n_output=config.n_output,
        activation_type=config.node_activation,
        dataset=dataset,
        loss=loss,
        beta_type=config.beta_type,
        batch_size=config.batch_size,
        n_samples=config.n_samples,
        is_gpu=config.is_gpu,
        return_all=True)

    y_pred = torch.argmax(y_pred, dim=1)

    from sklearn.metrics import confusion_matrix, accuracy_score
    print(f'Loss: {loss_value}')
    print('Confusion Matrix:')
    print(confusion_matrix(y_true, y_pred))

    print(f'Accuracy: {accuracy_score(y_true, y_pred) * 100} %')
Esempio n. 7
0
    def _initialize(self):
        if IS_ALTERNATIVE_NETWORK:
            Network = ProbabilisticFeedForwardAlternative
        else:
            Network = ProbabilisticFeedForward

        self.network = Network(n_input=self.config.n_input,
                               n_output=self.config.n_output,
                               is_cuda=self.is_cuda,
                               n_neurons_per_layer=self.n_neurons_per_layer,
                               n_hidden_layers=self.n_hidden_layers)
        self.network.reset_parameters()

        self.criterion = get_loss(problem_type=self.config.problem_type)
        self.optimizer = Adam(self.network.parameters(),
                              lr=self.lr,
                              weight_decay=self.weight_decay)

        if self.is_cuda:
            self.network.cuda()
            self.criterion.cuda()
Esempio n. 8
0
    def evaluate(self, population: dict):
        '''
        population: is a Dict{Int, Genome}
        '''
        # TODO: make n_samples increase with generation number
        n_samples = self.config.n_samples
        if self.parallel_evaluation:
            tasks = []
            for genome in population.values():
                logger.debug(f'Genome {genome.key}: {genome.get_graph()}')
                x = (genome, get_loss(problem_type=self.config.problem_type),
                     self.config.beta_type, self.config.problem_type,
                     IS_TESTING,
                     self.batch_size, n_samples, self.is_gpu)
                tasks.append(x)

            # TODO: fix logging when using multiprocessing. Easy fix is to disable
            fitnesses = list(self.pool.imap(evaluate_genome_task, tasks, chunksize=len(population)//self.n_processes))

            for i, genome in enumerate(population.values()):
                genome.fitness = fitnesses[i]

        else:
            self.dataset = self._get_dataset()
            self.loss = self._get_loss()
            for genome in population.values():
                logger.debug(f'Genome {genome.key}: {genome.get_graph()}')
                genome.fitness = - evaluate_genome(genome=genome,
                                                   problem_type=self.config.problem_type,
                                                   dataset=self.dataset,
                                                   loss=self.loss,
                                                   is_testing=IS_TESTING,
                                                   beta_type=self.config.beta_type,
                                                   batch_size=self.batch_size,
                                                   n_samples=n_samples,
                                                   is_gpu=self.is_gpu)

        return population
Esempio n. 9
0
    loss_value = kl_posterior.item()
    return loss_value


tasks = []


pool = Pool(processes=N_PROCESSES, initializer=process_initialization, initargs=(config.dataset, True))
for genome in genomes:
    logger.debug(f'Genome {genome.key}: {genome.get_graph()}')

    # x = torch.zeros(2, 784).float()
    # y = torch.zeros(2).long()
    # x=dataset.x.clone().detach()
    # y=dataset.y.clone().detach()
    loss = get_loss('classification')
    beta_type = 'other'
    problem_type = 'classification'
    batch_size = 100000
    n_samples = 20
    is_gpu = False
    x = (genome.copy(),
         # x, y,
         loss, beta_type, problem_type,
         batch_size, n_samples, is_gpu)

    tasks.append(x)

# TODO: fix logging when using multiprocessing. Easy fix is to disable
fitnesses = list(pool.imap(evaluate_genome_parallel, tasks, chunksize=max([len(genomes)//N_PROCESSES, 1])))
Esempio n. 10
0
    def _generate_row(self, report, absolute_best=True):
        execution_id = report.execution_id
        correlation_id = report.correlation_id
        if absolute_best:
            genome_dict = report.data['best_individual']
            best_individual_fitness = report.data['best_individual_fitness']
        else:
            genome_dict = report.data['fine_tuning'][
                'best_genome_before_fine_tuning']
            best_individual_fitness = report.data['fine_tuning'][
                'best_fitness_before_fine_tuning']

        genome = Genome.from_dict(genome_dict)
        config = genome.genome_config
        self.configurations[execution_id] = config
        self.best_genomes[execution_id] = genome
        self.best_networks[execution_id] = ComplexStochasticNetwork(
            genome=genome)

        set_configuration(config)
        # evaluate genome
        loss = get_loss(problem_type=config.problem_type)
        print(f'Train percentage: {config.train_percentage}')
        print(f'Random state: {config.dataset_random_state}')
        dataset = get_dataset(config.dataset,
                              train_percentage=config.train_percentage,
                              testing=True,
                              random_state=config.dataset_random_state,
                              noise=config.noise,
                              label_noise=config.label_noise)
        x, y_true, y_pred_prob, loss_value = evaluate_genome(
            genome=genome,
            dataset=dataset,
            loss=loss,
            problem_type=config.problem_type,
            beta_type=config.beta_type,
            batch_size=config.batch_size,
            n_samples=self.n_samples,
            is_gpu=config.is_gpu,
            is_testing=True,
            return_all=True)
        y_pred = torch.argmax(y_pred_prob, dim=1)
        train_percentage = config.train_percentage
        noise = config.noise
        label_noise = config.label_noise
        duration = report.duration
        n_parameters = genome.calculate_number_of_parameters()
        n_nodes = genome.n_bias_parameters // 2
        n_connections = genome.n_weight_parameters // 2
        n_layers = self._get_number_of_layers(genome)
        mean_genome_std = get_mean_std(genome)
        end_condition = report.data['end_condition']
        chunk = pd.DataFrame(
            {
                'correlation_id': correlation_id,
                'execution_id': execution_id,
                'train_percentage': train_percentage,
                'noise': noise,
                'label_noise': label_noise,
                'is_bayesian': False if config.fix_std else True,
                'beta': config.beta,
                'loss_training': -best_individual_fitness,
                'loss_testing': loss_value,
                'duration': duration,
                'end_condition': end_condition,
                'n_parameters': n_parameters,
                'n_nodes': n_nodes,
                'n_connections': n_connections,
                'n_layers': n_layers,
                'mean_genome_std': mean_genome_std,
            },
            index=[0])
        if config.problem_type == 'classification':
            chunk['accuracy'] = accuracy_score(y_true, y_pred) * 100
            chunk['precision'] = precision_score(y_true,
                                                 y_pred,
                                                 average='weighted')
            chunk['recall'] = recall_score(y_true, y_pred, average='weighted')
            chunk['f1'] = f1_score(y_true, y_pred, average='weighted')
            ece, _ = expected_calibration_error(
                y_true.numpy(),
                y_pred_prob.numpy(),
                n_bins=ECE_N_BINS,
                uniform_binning=UNIFORM_BINNING)
            chunk['ece'] = ece
        else:
            chunk['mse'] = mean_squared_error(y_true, y_pred)
            chunk['mae'] = mean_absolute_error(y_true, y_pred)
        return chunk
Esempio n. 11
0
 def _get_loss(self):
     if self.loss is None:
         self.loss = get_loss(problem_type=self.config.problem_type)
     return self.loss
Esempio n. 12
0
    def train(self, genome):
        kl_qw_pw = compute_kl_qw_pw(genome=genome)
        # setup network
        self.network = ComplexStochasticNetwork(genome=genome,
                                                is_trainable=True,
                                                is_cuda=self.is_cuda)
        self.criterion = get_loss(problem_type=self.problem_type)
        if self.is_cuda:
            self.network.cuda()
            self.criterion.cuda()

        self.optimizer = Adam(self.network.parameters(),
                              lr=self.lr,
                              weight_decay=self.weight_decay)
        x_batch, y_batch = self.dataset.x_train, self.dataset.y_train
        x_train, x_val, y_train, y_val = self.train_val_split(
            x_batch, y_batch, problem_type=self.problem_type, val_ratio=0.2)
        x_train, _ = _prepare_batch_data(
            x_batch=x_train,
            y_batch=y_train,
            problem_type=self.problem_type,
            is_gpu=False,  # this could be removed
            n_input=genome.n_input,
            n_output=genome.n_output,
            n_samples=self.n_samples)

        x_val, _ = _prepare_batch_data(x_batch=x_val,
                                       y_batch=y_val,
                                       problem_type=self.problem_type,
                                       is_gpu=False,
                                       n_input=genome.n_input,
                                       n_output=genome.n_output,
                                       n_samples=self.n_samples)

        if self.is_cuda:
            x_train = x_train.cuda()
            y_train = y_train.cuda()
            x_val = x_val.cuda()
            y_val = y_val.cuda()

        self.network.train()
        for epoch in range(self.n_epochs):
            loss_epoch = self._train_one(x_train, y_train, kl_qw_pw)
            # if epoch % 10 == 0:
            _, _, _, loss_val = self._evaluate(x_val,
                                               y_val,
                                               network=self.network)

            if loss_val < self.best_loss_val:
                self.best_loss_val = loss_val
                self.best_network_state = copy.deepcopy(
                    self.network.state_dict())
                self.last_update = epoch

            if epoch - self.last_update > N_EPOCHS_WITHOUT_IMPROVING:
                print(
                    f'Breaking training as not improving for {N_EPOCHS_WITHOUT_IMPROVING} epochs'
                )
                break

            if epoch % 200 == 0:
                print(f'Epoch = {epoch}. Training Loss: {loss_epoch}. '
                      f'Best Val. Loss: {self.best_loss_val}')
        self.network.clear_non_existing_weights(
            clear_grad=False)  # reset non-existing weights
        self.final_loss = loss_epoch
        print(f'Final Epoch = {epoch}. Training Error: {self.final_loss}')