Ejemplo n.º 1
0
 def sgd_train_without_validation(self, data):
     x_train = data.results['inputs']
     y_train = data.results['targets']
     looper = trange(self.hyperparams['nr_epochs'], desc='Initialising')
     for epoch in looper:
         # self.processor.train()
         self.train_step(x_train, y_train)
         # self.processor.eval()
         with torch.no_grad():
             prediction = self.processor(data.results['inputs'])
             data.results['performance_history'][epoch] = self.loss_fn(
                 prediction,
                 y_train).item()  # data.results['targets']).item()
         if self.configs['checkpoints']['use_checkpoints'] is True and (
             (epoch + 1) % self.configs['checkpoints']['save_interval']
                 == 0):
             save('torch',
                  os.path.join(self.default_checkpoints_dir,
                               f'checkpoint.pt'),
                  data=self.processor)
         # if epoch % self.hyperparams['save_interval'] == 0:
         error = data.results['performance_history'][epoch]
         description = ' Epoch: ' + str(epoch) + ' Training Error:' + str(
             error)
         looper.set_description(description)
         if error <= self.hyperparams['stop_threshold']:
             print(
                 f"Reached threshold error {self.hyperparams['stop_threshold']}. Stopping"
             )
             break
     data.set_result_as_numpy('best_output', prediction)
     return data
Ejemplo n.º 2
0
 def save_results(self):
     save_directory = create_directory_timestamp(self.save_path,
                                                 self.save_dir)
     save(mode='pickle',
          configs=self.config_dict,
          path=save_directory,
          filename='result',
          dictionary=self.results.results)
Ejemplo n.º 3
0
 def save_results(self, save_data):
     if save_data:
         save(mode='pickle',
              file_path=os.path.join(self.default_output_dir,
                                     'results.pickle'),
              data=self.data.results)
         save(mode='configs',
              file_path=os.path.join(self.default_output_dir,
                                     'configs.json'),
              data=self.configs)
Ejemplo n.º 4
0
    def run_test(self, validate=False):
        print(
            '*****************************************************************************************'
        )
        print(
            f"CAPACITY TEST FROM VCDIM {self.configs['from_dimension']} TO VCDIM {self.configs['to_dimension']} "
        )
        print(
            '*****************************************************************************************'
        )
        save(mode='configs', file_path=self.configs_dir, data=configs)
        self.summary_results = {
            'capacity_per_N': [],
            'accuracy_distib_per_N': [],
            'performance_distrib_per_N': [],
            'correlation_distrib_per_N': []
        }
        while True:
            capacity, accuracy_array, performance_array, correlation_array = self.vcdimension_test.run_test(
                self.current_dimension, validate=validate)
            self.summary_results['capacity_per_N'].append(capacity)
            self.summary_results['accuracy_distib_per_N'].append(
                accuracy_array[1:-1])
            self.summary_results['performance_distrib_per_N'].append(
                performance_array[1:-1])
            self.summary_results['correlation_distrib_per_N'].append(
                correlation_array[1:-1])
            if not self.next_vcdimension():
                break

        self.vcdimension_test.close_results_file()
        self.plot_summary()
        dict_loc = os.path.join(
            self.configs['vc_dimension_test']['results_base_dir'],
            'summary_results.pkl')
        with open(dict_loc, 'wb') as fp:
            pickle.dump(self.summary_results,
                        fp,
                        protocol=pickle.HIGHEST_PROTOCOL)
        print(
            '*****************************************************************************************'
        )
Ejemplo n.º 5
0
    def sgd_train_with_validation(self, data):
        x_train = data.results['inputs']
        y_train = data.results['targets']
        x_val = data.results['inputs_val']
        y_val = data.results['targets_val']
        looper = trange(self.hyperparams['nr_epochs'], desc=' Initialising')
        for epoch in looper:

            self.train_step(x_train, y_train)
            # with torch.no_grad():
            data.results['performance_history'][
                epoch, 0], prediction_training, data.results[
                    'target_indices'] = self.evaluate_training_error(
                        x_val, x_train, y_train)
            data.results['performance_history'][
                epoch,
                1], prediction_validation = self.evaluate_validation_error(
                    x_val, y_val)
            if self.configs['checkpoints']['use_checkpoints'] and (
                (epoch + 1) % self.configs['checkpoints']['save_interval']
                    == 0):
                save('torch',
                     os.path.join(self.default_checkpoints_dir,
                                  f'checkpoint.pt'),
                     data=self.processor)
        #    if epoch % self.hyperparams['save_interval'] == 0:
            training_error = data.results['performance_history'][epoch, 0]
            validation_error = data.results['performance_history'][epoch, 1]
            description = ' Epoch: ' + str(epoch) + ' Training Error:' + str(
                training_error) + ' Val. Error:' + str(validation_error)
            looper.set_description(description)
            if training_error <= self.hyperparams[
                    'stop_threshold'] and validation_error <= self.hyperparams[
                        'stop_threshold']:
                print(
                    f"Reached threshold error {self.hyperparams['stop_threshold'] } for training and validation. Stopping"
                )
                break
        data.set_result_as_numpy('best_output', prediction_validation)
        data.set_result_as_numpy('best_output_training', prediction_training)
        return data
Ejemplo n.º 6
0
def trainer(data, network, config_dict, loss_fn=torch.nn.MSELoss()):

    # set configurations
    if "seed" in config_dict.keys():
        torch.manual_seed(config_dict['seed'])
        print('The torch RNG is seeded with ', config_dict['seed'])

    if "betas" in config_dict.keys():
        optimizer = torch.optim.Adam(network.parameters(),
                                     lr=config_dict['learning_rate'],
                                     betas=config_dict["betas"])
        print("Set betas to values: ", {config_dict["betas"]})
    else:
        optimizer = torch.optim.Adam(network.parameters(),
                                     lr=config_dict['learning_rate'])
    print('Prediction using ADAM optimizer')
    if 'results_path' in config_dict.keys():
        dir_path = create_directory_timestamp(config_dict['results_path'],
                                              config_dict['experiment_name'])
    else:
        dir_path = None

    # Define variables
    x_train, y_train = data[0]
    x_val, y_val = data[1]
    costs = np.zeros((config_dict['nr_epochs'],
                      2))  # training and validation costs per epoch

    for epoch in range(config_dict['nr_epochs']):

        network.train()
        permutation = torch.randperm(x_train.size()[0])  # Permute indices

        for mb in range(0, len(permutation), config_dict['batch_size']):

            # Get prediction
            indices = permutation[mb:mb + config_dict['batch_size']]
            x_mb = x_train[indices]
            y_pred = network(x_mb)
            # GD step
            if 'regularizer' in dir(network):
                loss = loss_fn(y_pred,
                               y_train[indices]) + network.regularizer()
            else:
                loss = loss_fn(y_pred, y_train[indices])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        # Evaluate training error
        network.eval()
        samples = len(x_val)
        get_indices = torch.randperm(len(x_train))[:samples]
        x_sampled = x_train[get_indices]
        prediction = network(x_sampled)
        target = y_train[get_indices]
        costs[epoch, 0] = loss_fn(prediction, target).item()
        # Evaluate Validation error
        prediction = network(x_val)
        costs[epoch, 1] = loss_fn(prediction, y_val).item()

        if dir_path and (epoch + 1) % SGD_CONFIGS['save_interval'] == 0:
            save('torch',
                 config_dict,
                 dir_path,
                 f'checkpoint_epoch{epoch}.pt',
                 torch_model=network)

        if epoch % 10 == 0:
            print('Epoch:', epoch, 'Val. Error:', costs[epoch, 1],
                  'Training Error:', costs[epoch, 0])

    if dir_path:
        save('torch',
             config_dict,
             dir_path,
             'trained_network.pt',
             torch_model=network)
    return costs
Ejemplo n.º 7
0
 def save_reproducibility_data(self, result):
     save(mode='configs', file_path=os.path.join(self.reproducibility_dir, 'configs.json'), data=self.configs)
     save(mode='torch', file_path=os.path.join(self.reproducibility_dir, 'model.pt'), data=self.algorithm.processor)
     save(mode='pickle', file_path=os.path.join(self.reproducibility_dir, 'results.pickle'), data=result)
Ejemplo n.º 8
0
    def optimize(self,
                 inputs,
                 targets,
                 validation_data=(None, None),
                 data_info=None,
                 mask=None,
                 save_data=True):
        """Wraps trainer function in sgd_torch for use in algorithm_manager.
        """
        assert isinstance(
            inputs, torch.Tensor
        ), f"Inputs must be torch.tensor, they are {type(inputs)}"
        assert isinstance(
            targets, torch.Tensor
        ), f"Targets must be torch.tensor, they are {type(targets)}"

        if save_data and 'results_base_dir' in self.configs:
            self.init_dirs(self.configs['results_base_dir'])
        if 'debug' in self.processor.configs and self.processor.configs[
                'debug'] and self.processor.configs[
                    'architecture'] == 'device_architecture':
            self.processor.init_dirs(self.configs['results_base_dir'])
        self.reset()

        if data_info is not None:
            # This case is only used when using the GD for creating a new model
            print('Using the Gradient Descent for Surrogate Model Generation.')
            try:
                if self.processor.info is not None:
                    print('The model is being retrained as a surrogate model')
                    self.processor.info['data_info_retrain'] = data_info
                    self.processor.info['smg_configs_retrain'] = self.configs
            except AttributeError:
                self.processor.info = {}
                print(
                    'The model has been generated from scratch as a torch_model'
                )
                self.processor.info['data_info'] = data_info
                self.processor.info['smg_configs'] = self.configs

        data = GDData(inputs,
                      targets,
                      self.hyperparams['nr_epochs'],
                      self.processor,
                      validation_data,
                      mask=mask)

        if validation_data[0] is not None and validation_data[1] is not None:
            data = self.sgd_train_with_validation(data)
        else:
            data = self.sgd_train_without_validation(data)

        if save_data:
            save('configs',
                 file_path=os.path.join(self.default_output_dir,
                                        'configs.json'),
                 data=self.configs)
            save('torch',
                 file_path=os.path.join(self.default_output_dir, 'model.pt'),
                 data=self.processor)
            save(mode='pickle',
                 file_path=os.path.join(self.default_output_dir,
                                        'results.pickle'),
                 data=data.results)
        return data
Ejemplo n.º 9
0
    def optimize(self,
                 inputs,
                 targets,
                 validation_data=(None, None),
                 mask=None,
                 save_data=True):
        '''
            inputs = The inputs of the algorithm. They need to be in numpy. The GA also requires the input to be a waveform.
            targets = The targets to which the algorithm will try to fit the inputs. They need to be in numpy.
            validation_data = In some cases, it is required to provide the validation data in the form of (training_data, validation_data)
            mask = In cases where the input is a waveform, the mask helps filtering the slopes of the waveform
        '''
        self.reset()
        if save_data and 'results_base_dir' in self.configs:
            self.init_dirs(self.configs['results_base_dir'])
        # np.random.seed(seed=self.seed)
        if (validation_data[0] is not None) and (validation_data[1]
                                                 is not None):
            print(
                '======= WARNING: Validation data is not processed in GA ======='
            )

        self.data = GAData(inputs, targets, mask,
                           self.configs['hyperparameters'])
        self.pool = np.zeros((self.genomes, self.genes))
        self.opposite_pool = np.zeros((self.genomes, self.genes))
        for i in range(0, self.genes):
            self.pool[:, i] = np.random.uniform(self.generange[i][0],
                                                self.generange[i][1],
                                                size=(self.genomes, ))

        # Evolution loop
        looper = trange(self.generations, desc='Initialising', leave=False)
        for gen in looper:

            self.outputs = self.evaluate_population(
                inputs, self.pool, self.data.results['targets'])
            self.fitness = self.fitness_function(
                self.outputs[:, self.data.results['mask']],
                self.data.results['targets'][self.data.results['mask']],
                clipvalue=self.clipvalue)

            self.data.update({
                'generation': gen,
                'genes': self.pool,
                'outputs': self.outputs,
                'fitness': self.fitness
            })
            looper.set_description(
                self.data.get_description(gen))  # , end - start))

            if self.check_threshold(save_data):
                break

            if (self.use_checkpoints is True
                    and gen % self.checkpoint_frequency == 0):
                save(mode='pickle',
                     file_path=os.path.join(self.default_checkpoints_dir,
                                            'result.pickle'),
                     data=self.data.results)

            self.next_gen(gen)

        self.save_results(save_data)
        return self.data