Esempio n. 1
0
 def create_member(self, data_files):
     #Gets the training indexes
     if self.member_number > 0:
         train_indexes = \
             self.resampler.make_new_train(self.params.resample_size)
     else:
         train_indexes = [None, None]
     #Packs the needed data
     dataset = [train_indexes, data_files]
     #Trains the model
     m = mlp.sequential_model(dataset,
                              self.params,
                              member_number=self.member_number)
     #Gets the errors for the train set and updates the weights
     print('Getting the train errors and updating the weights')
     errors = common.errors(m, data_files[0], self.params.batch_size)
     e = np.sum((errors * self.D))
     if e > 0:
         n_classes = data_files[0]['y'].shape[1]
         alpha = .5 * (math.log((1 - e) / e) + math.log(n_classes - 1))
         if alpha <= 0.0:
             #By setting to 0 (instead of crashing), we should avoid
             # cicleci problems
             print("\nWARNING - NEGATIVE ALPHA (setting to 0.0)\n")
             alpha = 0.0
         w = np.where(errors == 1, self.D * math.exp(alpha),
                      self.D * math.exp(-alpha))
         self.D = w / w.sum()
     else:
         alpha = 1.0 / (self.member_number + 1)
     self.resampler.update_weights(self.D)
     self.alphas.append(alpha)
     self.member_number += 1
     return (m.to_yaml(), m.get_weights())
Esempio n. 2
0
 def create_member(self):
     train_set, sample_weights = self.resampler.make_new_train(
         self.params.resample_size)
     if self.member_number > 0:
         resampled = [
             train_set,
             self.resampler.get_valid(),
             self.resampler.get_test()
         ]
     else:
         sample_weights = None
         resampled = [
             self.resampler.get_train(),
             self.resampler.get_valid(),
             self.resampler.get_test()
         ]
     m = mlp.sequential_model(resampled,
                              self.params,
                              member_number=self.member_number,
                              sample_weight=sample_weights)
     orig_train = self.resampler.get_train()
     errors = common.errors(m, orig_train[0], orig_train[1])
     e = np.sum((errors * self.D))
     if e > 0:
         alpha = .5 * math.log((1 - e) / e)
         w = np.where(errors == 1, self.D * math.exp(alpha),
                      self.D * math.exp(-alpha))
         self.D = w / w.sum()
     else:
         alpha = 1.0 / (self.member_number + 1)
     self.resampler.update_weights(self.D)
     self.alphas.append(alpha)
     self.member_number += 1
     return (m.to_yaml(), m.get_weights())
Esempio n. 3
0
    def create_member(self):
        train_set, sample_weights = self.resampler.make_new_train(
            self.params.resample_size)
        if self.member_number > 0:
            resampled = [
                train_set,
                self.resampler.get_valid(),
                self.resampler.get_test()
            ]
        else:
            resampled = [
                self.resampler.get_train(),
                self.resampler.get_valid(),
                self.resampler.get_test()
            ]
        m = mlp.sequential_model(
            resampled,
            self.params,  # <--- trains the new model(step 2 in [1]) 
            member_number=self.member_number)
        orig_train = self.resampler.get_train()

        distance = common.distance(
            m, orig_train[0],
            orig_train[1])  # <--- loss for each element (steps 3 and 4 in [1])

        max_dist = distance.max()
        distance_norm = distance / max_dist  # <--- the loss function is now normalized in range [0,1]

        weighted_dist = np.sum(
            (distance_norm *
             self.D))  # <--- average weighted loss (step 5 in [1])

        beta = weighted_dist / (
            1 - weighted_dist
        )  # <--- computation of the confidence in the predictor (step 6 in [1])
        #       [low beta = good prediction]

        w = self.D * (
            beta**(1 - distance_norm)
        )  # <--- updates the weights for each sample (step 7 in [1])
        self.D = w / w.sum()

        #alpha = the better the model is (smaller beta), the bigger alpha will be   [alpha is computed to maintain consistency with other models]
        alpha = 0.5 * math.log(1 / beta)

        self.resampler.update_weights(self.D)
        self.alphas.append(alpha)
        self.member_number += 1
        return (m.to_yaml(), m.get_weights())
Esempio n. 4
0
 def create_member(self, data_files):
     #gets the training indexes
     if self.member_number > 0:
         train_indexes = self.resampler.make_new_train(
             self.params.resample_size)
     else:
         train_indexes = [None, None]
     #packs the needed data
     dataset = [train_indexes, data_files]
     #trains the model
     m = mlp.sequential_model(dataset,
                              self.params,
                              member_number=self.member_number)
     self.member_number += 1
     return (m.to_yaml(), m.get_weights())
Esempio n. 5
0
 def create_member(self):
     self.set_defaults()
     if self.member_number > 0:
         train_set, sample_weights = self.resampler.make_new_train(
             self.params.resample_size)
         resampled = [
             train_set,
             self.resampler.get_valid(),
             self.resampler.get_test()
         ]
     else:
         resampled = [
             self.resampler.get_train(),
             self.resampler.get_valid(),
             self.resampler.get_test()
         ]
     sample_weights = None
     if self.member_number > 0:
         self.params.n_epochs = self.n_epochs_after_first
     m = mlp.sequential_model(
         resampled,
         self.params,
         member_number=self.member_number,
         model_weights=self.weights,
         #the copy is because there is a bug in Keras that deletes names
         model_config=copy.deepcopy(self.model_config),
         frozen_layers=self.frozen_layers)
     self.weights = [l.get_weights() for l in m.layers]
     injection_index = self.incremental_index + self.member_number
     if self.incremental_layers is not None:
         if injection_index == -1:
             injection_index = len(self.model_config)
         new_layers = []
         for i, l in enumerate(self.incremental_layers):
             new_layers.append(copy.deepcopy(l))
         #make residual block
         new_block = self._residual_block(injection_index, new_layers, m,
                                          self.member_number)
         new_model_config = self.model_config[:injection_index] + [
             new_block
         ] + self.model_config[injection_index:]
         if self.freeze_old_layers:
             self.frozen_layers = list(range(0, injection_index))
         self.model_config = copy.deepcopy(new_model_config)
         self.weights = self.weights[:injection_index]
     self.member_number += 1
     return (m.to_yaml(), m.get_weights())
Esempio n. 6
0
 def create_member(self):
     train_set, sample_weights = self.resampler.make_new_train(
         self.params.resample_size)
     if self.member_number > 0:
         resampled = [
             train_set,
             self.resampler.get_valid(),
             self.resampler.get_test()
         ]
     else:
         resampled = [
             self.resampler.get_train(),
             self.resampler.get_valid(),
             self.resampler.get_test()
         ]
     m = mlp.sequential_model(resampled,
                              self.params,
                              member_number=self.member_number)
     self.member_number += 1
     return (m.to_yaml(), m.get_weights())
Esempio n. 7
0
 def create_member(self, data_files):
     #Gets the training indexes and defines c, if needed
     if self.member_number > 0:
         train_indexes = \
             self.resampler.make_new_train(self.params.resample_size)
     else:
         train_indexes = [None, None]
         sample_counts = common.count_classes(data_files[0])
         self.c = np.sum(np.square(sample_counts / self.train_size))
     #Packs the needed data
     dataset = [train_indexes, data_files]
     #Trains the model
     m = mlp.sequential_model(dataset,
                              self.params,
                              member_number=self.member_number)
     #Gets the errors for the train set and updates the weights
     print('Getting the confidence and updating the weights')
     h = common.confidence(m, data_files[0], self.params.batch_size)
     r = np.sum((h * self.D))
     if r > self.c:
         alpha = math.log(((1 - self.c) * r) / (self.c * (1 - r)))
         if alpha <= 0.0:
             #By setting to 0 (instead of crashing), we should avoid
             # cicleci problems
             print("\nWARNING - NEGATIVE ALPHA (setting to 0.0)\n")
             alpha = 0.0
         w = self.D * math.exp(-alpha * (h - self.c))
         self.D = w / w.sum()
     else:
         #This model should be discarded, since it's worse than containing
         # no additional information
         alpha = 0.0
     self.resampler.update_weights(self.D)
     self.alphas.append(alpha)
     self.member_number += 1
     return (m.to_yaml(), m.get_weights())
Esempio n. 8
0
 def create_member(self):
     self.set_defaults()
     if self.member_number > 0:
         if self.resample:
             train_set, sample_weights = self.resampler.make_new_train(
                 self.params.resample_size)
             resampled = [
                 train_set,
                 self.resampler.get_valid(),
                 self.resampler.get_test()
             ]
         else:
             sample_weights = self.D
             resampled = [
                 self.resampler.get_train(),
                 self.resampler.get_valid(),
                 self.resampler.get_test()
             ]
     else:
         sample_weights = None
         resampled = [
             self.resampler.get_train(),
             self.resampler.get_valid(),
             self.resampler.get_test()
         ]
     if self.member_number > 0:
         self.params.n_epochs = self.n_epochs_after_first
     if not self.use_sample_weights:
         sample_weights = None
     m = mlp.sequential_model(
         resampled,
         self.params,
         member_number=self.member_number,
         model_weights=self.weights,
         #the copy is because there is a bug in Keras that deletes names
         model_config=copy.deepcopy(self.model_config),
         frozen_layers=self.frozen_layers,
         sample_weight=sample_weights)
     self.weights = [l.get_weights() for l in m.layers]
     injection_index = self.incremental_index + self.member_number
     if self.incremental_layers is not None:
         if injection_index == -1:
             injection_index = len(self.model_config)
         new_layers = []
         for i, l in enumerate(self.incremental_layers):
             new_layers.append(copy.deepcopy(l))
         #make residual block
         new_block = self._residual_block(injection_index, new_layers, m,
                                          self.member_number)
         new_model_config = self.model_config[:injection_index] + [
             new_block
         ] + self.model_config[injection_index:]
         if self.freeze_old_layers:
             self.frozen_layers = list(range(0, injection_index))
         self.model_config = copy.deepcopy(new_model_config)
         self.weights = self.weights[:injection_index]
     orig_train = self.resampler.get_train()
     K = orig_train[1].shape[1]
     self.n_classes = K
     errors = common.errors(m, orig_train[0], orig_train[1])
     error_rate = np.mean(errors)
     if error_rate >= 1. - (1. / K):
         return (None, None, False)
     if self.real:
         #Real BRN
         print(("-" * 40))
         print(("error rate: {}".format(error_rate)))
         if error_rate > 0:
             continue_boosting = True
             y_coding = np.where(orig_train[1] == 0., -1. / (K - 1), 1.)
             proba = m.predict(orig_train[0])
             proba[proba < np.finfo(proba.dtype).eps] = np.finfo(
                 proba.dtype).eps
             print((proba[:10]))
             print((self.D[:10]))
             factor = np.exp(
                 -1. * (((K - 1.) / K) * inner1d(y_coding, np.log(proba))))
             print((factor[:10]))
             w = self.D * factor
             print((w[:10]))
             self.D = w / w.sum()
             self.resampler.update_weights(self.D)
         else:
             continue_boosting = not self.early_stopping
         self.member_number += 1
         return (m.to_yaml(), m.get_weights(), continue_boosting)
     else:
         if error_rate > 0:
             continue_boosting = True
             #e = sum((errors * self.D)) / sum(self.D)
             e = np.average(errors, weights=self.D)
             alpha = math.log((1 - e) / e) + math.log(K - 1)
             factor = np.clip(
                 np.where(errors == 1, math.exp(alpha), math.exp(-alpha)),
                 1e-3, 1e3)
             w = self.D * factor
             self.D = w / w.sum()
             self.resampler.update_weights(self.D)
         else:
             continue_boosting = not self.early_stopping
             alpha = 1. / (self.member_number + 1)
         self.alphas.append(alpha)
         self.member_number += 1
         return (m.to_yaml(), m.get_weights(), continue_boosting)
Esempio n. 9
0
 def create_member(self):
     self.set_defaults()
     train_set, sample_weights = self.resampler.make_new_train(
         self.params.resample_size)
     if self.member_number > 0:
         if self.resample:
             resampled = [
                 train_set,
                 self.resampler.get_valid(),
                 self.resampler.get_test()
             ]
         else:
             resampled = [
                 self.resampler.get_train(),
                 self.resampler.get_valid(),
                 self.resampler.get_test()
             ]
     else:
         sample_weights = None
         resampled = [
             self.resampler.get_train(),
             self.resampler.get_valid(),
             self.resampler.get_test()
         ]
     if self.member_number > 0:
         self.params.n_epochs = self.n_epochs_after_first
         if 'lr_after_first' in self.params.__dict__:
             self.params.optimizer['config'][
                 'lr'] = self.params.lr_after_first
     if not self.use_sample_weights:
         sample_weights = None
     m = mlp.sequential_model(
         resampled,
         self.params,
         member_number=self.member_number,
         model_weights=self.weights,
         #the copy is because there is a bug in Keras that deletes names
         model_config=copy.deepcopy(self.model_config),
         sample_weight=sample_weights)
     self.weights = [l.get_weights() for l in m.layers]
     injection_index = self.incremental_index + self.member_number * len(
         self.incremental_layers)
     if self.incremental_layers is not None:
         if injection_index == -1:
             injection_index = len(self.model_config)
         new_layers = []
         for i, l in enumerate(self.incremental_layers):
             l['config']['name'] = "DIB-incremental-{0}-{1}".format(
                 self.member_number, i)
             new_layers.append(l)
         new_model_config = self.model_config[:
                                              injection_index] + new_layers + self.model_config[
                                                  injection_index:]
         self.model_config = copy.deepcopy(new_model_config)
         self.weights = self.weights[:injection_index]
     orig_train = self.resampler.get_train()
     K = orig_train[1].shape[1]
     errors = common.errors(m, orig_train[0], orig_train[1])
     e = sum((errors * self.D)) / sum(errors + np.finfo(np.float32).eps)
     alpha = math.log((1 - e) / e + np.finfo(np.float32).eps) + math.log(K -
                                                                         1)
     w = np.where(errors == 1, self.D * math.exp(alpha),
                  self.D * math.exp(-alpha))
     self.D = w / w.sum()
     self.resampler.update_weights(self.D)
     self.alphas.append(alpha)
     self.member_number += 1
     m_yaml = m.to_yaml()
     m_weights = m.get_weights()
     del m
     return (m_yaml, m_weights)
Esempio n. 10
0
 for i in range(0,params.ensemble_size):
     print 'training member {0}'.format(i)
     members.append(method.create_member())
     ensemble = method.create_aggregator(params,members,train_set,valid_set)
     test_set_x, test_set_y = method.resampler.get_test()
     test_score = accuracy(ensemble,test_set_x,test_set_y)
     print 'Intermediate test accuracy: {0} %'.format(test_score * 100.)
     intermediate_scores.append(test_score)
     final_score = test_score
 print "\nFinal Ensemble test accuracy: {0} %".format(final_score * 100.)
 print "Preparing distillation dataset.."
 train_set_yhat = ensemble.predict(dataset[0][0])
 distilled_dataset = copy.deepcopy(dataset)
 distilled_dataset = ((dataset[0][0], train_set_yhat), dataset[1], dataset[2])
 params = original_params
 mlp = sequential_model(distilled_dataset, params)
 exit(1)
 if 'results_db' in params.__dict__:
     if 'results_host' in params.__dict__:
         host = params.results_host
     else:
         host = None
     print "saving results to {0}@{1}".format(params.results_db,host)
     conn = MongoClient(host=host)
     db = conn[params.results_db]
     if 'results_table' in params.__dict__: 
         table_name = params.results_table
     else:
         table_name = 'results'
     table = db[table_name]
     results = {
Esempio n. 11
0
     if m[0] is not None:
         members.append(m[:2])
         ensemble = method.create_aggregator(params,members,train_set,valid_set)
         test_set_x, test_set_y = method.resampler.get_test()
         test_score = accuracy(ensemble,test_set_x,test_set_y)
         print('Intermediate test accuracy: {0} %'.format(test_score * 100.))
         intermediate_scores.append(test_score)
         final_score = test_score
     if len(m) > 2 and not m[2]: #the ensemble method told us to stop
         break
 print("\nFinal Ensemble test accuracy: {0} %".format(final_score * 100.))
 print("Preparing distillation dataset..")
 train_set_yhat = ensemble.predict(dataset[0][0])
 distilled_dataset = ((dataset[0][0], train_set_yhat), dataset[1], dataset[2])
 params = original_params
 mlp = sequential_model(distilled_dataset, params, model_yaml = members[-1][0])
 if args.dump_shapes_to is not None:
     for i in range(len(members)):
         with open("{0}member-{1}.model".format(args.dump_shapes_to, i),"w") as f:
             f.truncate()
             f.write(members[i][0])
 if 'results_db' in params.__dict__:
     if 'results_host' in params.__dict__:
         host = params.results_host
     else:
         host = None
     print("saving results to {0}@{1}".format(params.results_db,host))
     conn = MongoClient(host=host)
     db = conn[params.results_db]
     if 'results_table' in params.__dict__: 
         table_name = params.results_table
Esempio n. 12
0
File: mlp.py Progetto: nitbix/toupee
    arg_param_pairings = [
        (args.results_db, 'results_db'),
        (args.results_host, 'results_host'),
        (args.results_table, 'results_table'),
        (args.epochs, 'n_epochs'),
    ]
    
    if 'seed' in args.__dict__:
        print "setting random seed to: {0}".format(args.seed)
        numpy.random.seed(args.seed)
    from toupee import data
    from toupee import config 
    from toupee.mlp import sequential_model

    import toupee
    print "using toupee version {0}".format(toupee.version)
    params = config.load_parameters(args.params_file)

    def arg_params(arg_value,param):
        if arg_value is not None:
            params.__dict__[param] = arg_value

    for arg, param in arg_param_pairings:
        arg_params(arg,param)
    dataset = data.load_data(params.dataset,
                             pickled = params.pickled,
                             one_hot_y = params.one_hot,
                             join_train_and_valid = params.join_train_and_valid)
    mlp = sequential_model(dataset, params)
Esempio n. 13
0
        (args.results_host, 'results_host'),
        (args.results_table, 'results_table'),
        (args.epochs, 'n_epochs'),
    ]
    
    if 'seed' in args.__dict__:
        print(("setting random seed to: {0}".format(args.seed)))
        numpy.random.seed(args.seed)
    from toupee import data
    from toupee import config 
    from toupee.mlp import sequential_model

    import toupee
    print(("using toupee version {0}".format(toupee.version)))
    params = config.load_parameters(args.params_file)

    def arg_params(arg_value,param):
        if arg_value is not None:
            params.__dict__[param] = arg_value

    for arg, param in arg_param_pairings:
        arg_params(arg,param)
    dataset = data.load_data(params.dataset,
                             pickled = params.pickled,
                             one_hot_y = params.one_hot,
                             join_train_and_valid = params.join_train_and_valid,
                             zca_whitening = params.zca_whitening)
    mlp = sequential_model(dataset, params)
    if args.save_file is not None:
        mlp.save(args.save_file)