예제 #1
0
def evaluate_model(model, train_data, test_data, trainvars, \
                   global_settings, choose_data, particles, nodeWise=False):

    if global_settings['ml_method'] == 'lbn':
        train_predicted_probabilities = model.predict([
            dlt.get_low_level(train_data, particles),
            dlt.get_high_level(train_data, particles, trainvars)
        ],
                                                      batch_size=1024)
        test_predicted_probabilities = model.predict([
            dlt.get_low_level(test_data, particles),
            dlt.get_high_level(test_data, particles, trainvars)
        ],
                                                     batch_size=1024)
        #print test_var["ll"][0], 'hl==', test_var["hl"][0]
        #print 'proba===', test_predicted_probabilities[0]
    else:
        train_predicted_probabilities = model.predict(
            train_data[trainvars].values)
        test_predicted_probabilities = model.predict(
            test_data[trainvars].values)
    if not nodeWise:
        plot_confusion_matrix(test_data, test_predicted_probabilities, \
           global_settings["output_dir"], choose_data+'_test')
        plot_confusion_matrix(train_data, train_predicted_probabilities, \
           global_settings["output_dir"], choose_data+'_train')
        plot_DNNScore(train_data, model, global_settings['ml_method'], \
           global_settings['output_dir'], trainvars, particles, choose_data+'_train')
        plot_DNNScore(test_data, model, global_settings['ml_method'], \
           global_settings['output_dir'], trainvars, particles, choose_data+'_test')

    test_fpr, test_tpr = mt.roc_curve(test_data['multitarget'].astype(int),
                                      test_predicted_probabilities,
                                      test_data['evtWeight'].astype(float))
    train_fpr, train_tpr = mt.roc_curve(train_data['multitarget'].astype(int),
                                        train_predicted_probabilities,
                                        train_data['evtWeight'].astype(float))
    train_auc = auc(train_fpr, train_tpr, reorder=True)
    test_auc = auc(test_fpr, test_tpr, reorder=True)
    test_info = {
        'fpr': test_fpr,
        'tpr': test_tpr,
        'auc': test_auc,
        'type': 'test',
        'prediction': test_predicted_probabilities
    }
    train_info = {
        'fpr': train_fpr,
        'tpr': train_tpr,
        'auc': train_auc,
        'type': 'train',
        'prediction': train_predicted_probabilities
    }
    return train_info, test_info
예제 #2
0
def plot_DNNScore(data, model, lbn, output_dir, trainvars, particles,
                  addition):
    data["max_node_pos"] = -1
    data["max_node_val"] = -1
    if lbn != 'lbn':
        for process in set(data["process"]):
            data = data.loc[data["process"] == process]
            value = model.predict(data[trainvars].values)
            data.loc[data["process"] == process, "max_node_pos"]\
                = np.argmax(value, axis=1)
            data.loc[data["process"] == process, "max_node_val"]\
                = np.amax(value, axis=1)
    else:
        for process in set(data["process"]):
            process_only_data = data.loc[data["process"] == process]
            value = model.predict([
                dlt.get_low_level(process_only_data, particles),
                dlt.get_high_level(process_only_data, particles, trainvars)
            ],
                                  batch_size=1024)
            data.loc[data['process'] == process,
                     "max_node_pos"] = np.argmax(value, axis=1)
            data.loc[data['process'] == process,
                     "max_node_val"] = np.amax(value, axis=1)
    hhvt.plot_DNNScore(data, output_dir, addition)
예제 #3
0
 def fit_model(self):
     history = self.model.fit(
         [dlt.get_low_level(self.train_data, self.particles[self.channel]),
          dlt.get_high_level(self.train_data, self.particles[self.channel], self.trainvars)],
         self.train_data['multitarget'].values,
         epochs=self.epoch,
         batch_size=self.batch_size,
         sample_weight=self.train_data['totalWeight'].values,
         validation_data=(
             [dlt.get_low_level(self.val_data, self.particles[self.channel]),
              dlt.get_high_level(self.val_data, self.particles[self.channel], self.trainvars)],
             self.val_data["multitarget"].values,
             self.val_data["totalWeight"].values
         ),
         callbacks=[self.reduce_lr, self.early_stopping]
     )
     if self.plot_history:
         hhvt.plot_loss_accuracy(history, self.output_dir, self.addition)
예제 #4
0
def evaluate_model(data_dict, global_settings, model):
    """Evaluates the model for the XGBoost method
    Parameters:
    ----------
    data_dict : dict
        Contains all the necessary information for the evaluation.
    global_settings : dict
        Preferences for the optimization
    model : XGBoost Booster?
        Model created by the xgboost.
    Returns:
    -------
    score : float
        The score calculated according to the fitness_fn
    """
    trainvars = data_dict['trainvars']
    train_data = data_dict['train']
    test_data = data_dict['test']
    particles = PARTICLE_INFO[global_settings['channel']]
    pred_train = model.predict(
        [dlt.get_low_level(train_data, particles),
          dlt.get_high_level(train_data, particles, trainvars)],
        batch_size=1024)
    pred_test = model.predict(
        [dlt.get_low_level(test_data, particles),
         dlt.get_high_level(test_data, particles, trainvars)],
        batch_size=1024)
    kappa = global_settings['kappa']
    if global_settings['fitness_fn'] == 'd_roc':
        return et.calculate_d_roc(
            data_dict, pred_train, pred_test, kappa=kappa, multiclass=True)
    elif global_settings['fitness_fn'] == 'd_ams':
        return et.calculate_d_ams(
            data_dict, pred_train, pred_test, kappa=kappa)
    else:
        raise ValueError(
            'The' + str(global_settings['fitness_fn'])
            + ' fitness_fn is not implemented'
        )
예제 #5
0
 def predict_from_model(self, data_):
     ll = dlt.get_low_level(data_, self.particles[self.channel])
     hl = dlt.get_high_level(data_, self.particles[self.channel], self.trainvars)
     prediction = self.model.predict([ll, hl])
     return prediction