def build_measure_GISFM(ensemble_matrix, target, score): """ Function to create the global ISFM measure :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances :param target: An array with the real class :param score: The score to use for classifier performance calculation :return: """ num_classifiers, num_classes, num_instances = ensemble_matrix.shape similarities = compute_similarities(ensemble_matrix) # To store measure measure = DataStore.DictDataStore(num_classifiers) confidences = np.empty((num_classifiers, )) # Get the callable score function if score == "acc": performance_function = accuracy elif score == "tpr": performance_function = tpr_mean elif score == "gm": performance_function = gm elif score == "f1": performance_function = get_f_measure_function(target) elif score == "auc": performance_function = get_auc_score_function(target) elif score == "ap": performance_function = get_ap_score_function(target) else: raise Exception( "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'") # For each individual classifier get its performance for i in range(num_classifiers): # Get accuracy of classifiers i prob = ensemble_matrix[i, :, :] if score == "auc" or score == "ap": val = performance_function(target, prob.T) else: pred = np.argmax(prob, axis=0) val = performance_function(target, pred) confidences[i] = val measure.put((i, ), 0.0) # Get the order of confidences order = np.argsort(confidences) # Calculate values for i in range(len(order)): s = similarities[order[i], order[i + 1:]] if len(s) == 0: s = 0.0 else: s = s.max() measure.put((order[i], ), confidences[order[i]] * (1 - s)) for i in all_combs(range(num_classifiers)): if len(i) > 1: v = 0.0 for j in i: v += measure.get((j, )) measure.put(i, v) measure.normalize() return measure
def build_measure_m_aggregation(ensemble_matrix, target, m_function, score="acc"): """ Returns the measure for the OIFM method :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances :param target: An array with the real class :param m_function: The function to use for measure calculation :param score: The score to use for classifier performance calculation :return: Measure """ # ensemble_matrix => num_classifiers, num_instances num_classifiers, num_classes, num_instances = ensemble_matrix.shape performances = np.empty((num_classifiers, )) if score == "acc": performance_function = accuracy elif score == "tpr": performance_function = tpr_mean elif score == "gm": performance_function = gm elif score == "f1": performance_function = get_f_measure_function(target) elif score == "auc": performance_function = get_auc_score_function(target) elif score == "ap": performance_function = get_ap_score_function(target) else: raise Exception( "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'") # For each individual classifier get its performance for i in range(num_classifiers): # Get accuracy of classifiers i prob = ensemble_matrix[i, :, :] if score == "auc" or score == "ap": val = performance_function(target, prob.T) else: pred = np.argmax(prob, axis=0) val = performance_function(target, pred) performances[i] = val measure = DataStore.DictDataStore(num_classifiers) # Calculate denominator performances_2 = np.power(performances, 2) denominator = m_function(performances_2) # For each combination get the measure value for i in all_combs(range(num_classifiers)): v = np.zeros((num_classifiers, )) for j in i: v[j] = performances_2[j] nominator = m_function(v) measure.put(i, nominator / denominator) return measure
def build_measure_additive(ensemble_matrix, target, score="acc"): """ Function that builds the additive measure :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances :param target: An array with the real class :param score: The score to use for classifier performance calculation :return: The additive measure """ # ensemble_matrix => num_classifiers, num_instances num_classifiers, num_classes, num_instances = ensemble_matrix.shape performances = np.empty(num_classifiers) if score == "acc": # performance_function = metrics.accuracy_score performance_function = accuracy elif score == "tpr": performance_function = tpr_mean elif score == "gm": performance_function = gm elif score == "f1": performance_function = get_f_measure_function(target) elif score == "auc": performance_function = get_auc_score_function(target) elif score == "ap": performance_function = get_ap_score_function(target) else: raise Exception( "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'") # for each possible classifier combination for i in range(num_classifiers): # Get accuracy of classifiers i prob = ensemble_matrix[i, :, :] if score == "auc" or score == "ap": val = performance_function(target, prob.T) else: # prob => num_classes, num_instances pred = np.argmax(prob, axis=0) val = performance_function(target, pred) performances[i] = val level_mean = performances.mean() y = performances - level_mean values = (1.0 / num_classifiers) + np.tanh(y * 100) / (2.0 * num_classifiers) measure = DataStore.DictDataStore(num_classifiers) # For each accuracy set measure value as variation of mean based on difference with the level mean for i in all_combs(range(num_classifiers)): value = 0.0 for j in i: value += values[j] measure.put(i, value) measure.normalize() return measure
def __init__(self, ensemble_matrix, target, score='acc', integral="choquet"): """ :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances :param target: An array with the real class :param score: The performance function to use :param integral: The integral to use (choquet or sugeno) """ # ensemble_matrix: (num_classifiers, num_classes, num_instances) num_classifiers, num_classes, num_instances = ensemble_matrix.shape # Calculate the strength for each classifier self.g = np.empty((num_classifiers, )) self.integral = integral if self.integral == "choquet": self.integral_function = self.choquet elif self.integral == "sugeno": self.integral_function = self.sugeno else: raise Exception("integral must be choquet or sugeno") if score == "acc": performance = accuracy elif score == "tpr": performance = tpr_mean elif score == "gm": performance = gm elif score == "f1": performance = get_f_measure_function(target) elif score == "auc": performance = get_auc_score_function(target) elif score == "ap": performance = get_ap_score_function(target) else: raise Exception("'score' must be acc, tpr, gm, f1, auc or ap") # Calculate performances for i in range(num_classifiers): if score == "auc" or score == "ap": self.g[i] = performance(target, ensemble_matrix[i, :, :].T) else: pred = np.argmax(ensemble_matrix[i, :, :], axis=0) self.g[i] = performance(target, pred) # Calculate the mean, and apply CPM gmean = np.mean(self.g) gdiff = self.g - gmean self.g = np.ones((num_classifiers, )) / float(num_classifiers) self.g = self.g + np.tanh(gdiff * 100) / (2.0 * num_classifiers) # Calculate lambda self.lmb = self.calculate_lambda(self.g)
def __init__(self, ensemble_matrix, target, score="acc"): """ Constructor :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances :param target: The target (true class) of each example :param score: The score function (performance) to use for weight calculation Valid scores: 'acc', 'tpr', 'gm', 'f1', 'auc' and 'ap' """ # ensemble_matrix => num_classifiers, num_classes, num_instances num_classifiers, num_classes, num_instances = ensemble_matrix.shape # Create an empty array performances = np.empty((num_classifiers, )) # Get the callable function if score == "acc": performance = metrics.accuracy_score elif score == "tpr": performance = tpr_mean elif score == "gm": performance = gm elif score == "f1": performance = get_f_measure_function(target) elif score == "auc": performance = get_auc_score_function(target) elif score == "ap": performance = get_ap_score_function(target) else: raise Exception( "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'") # For each classifier, get the performance for i in range(num_classifiers): # For auc and ap, the needed value is the probability of being of positive class if score == "auc" or score == "ap": val = performance(target, ensemble_matrix[i, :, :].T) else: # For the other metrics, the predicted class (max confidence class) pred = np.argmax(ensemble_matrix[i, :, :], axis=0) val = performance(target, pred) # Set the performance performances[i] = val # Store normalized accuracies self.weights = performances / np.sum(performances)
def __init__(self, ensemble_matrix, target, dynamic_measure_function, score, integral): """ :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances :param target: An array with the real class :param dynamic_measure_function: The function that returns the mesure values :param score: The score to use for classifier performance calculation :param integral: The integral to use (choquet or sugeno) """ # Assertions assert integral.lower() in ["choquet", "sugeno"] assert score.lower() in ["acc", "tpr", "gm", "f1", "auc", "ap"] num_classifiers, num_classes, num_instances = ensemble_matrix.shape self.ensemble_matrix = ensemble_matrix self.target = target self.dynamic_measure_function = dynamic_measure_function self.integral = integral.lower() # Get callable score if score.lower() == "acc": performance_function = accuracy elif score.lower() == "tpr": performance_function = tpr_mean elif score == "gm": performance_function = gm elif score == "f1": performance_function = get_f_measure_function(target) elif score == "auc": performance_function = get_auc_score_function(target) elif score == "ap": performance_function = get_ap_score_function(target) else: raise Exception( "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'") # Calculate the confidence of each classifier self.confidences = np.empty((num_classifiers, )) for i in range(num_classifiers): prob = ensemble_matrix[i, :, :] if score == "auc" or score == "ap": self.confidences[i] = performance_function(target, prob.T) else: pred = np.argmax(prob, axis=0) self.confidences[i] = performance_function(target, pred) # Calculate the similarities self.similarities = compute_similarities(ensemble_matrix) # If the dynamic function is mhm if self.dynamic_measure_function == dynamic_mhm: # Calculate the relative diversity self.relative_diversity = relative_diversity_dict( self.similarities) # Calculate the additive measure self.additive_measure = DataStore.DictDataStore( self.confidences.shape[0]) for i in all_combs(range(self.confidences.shape[0])): if len(i) == 1: self.additive_measure.put(i, self.confidences[i[0]]) else: v = 0.0 for j in i: v += self.additive_measure.get((j, )) self.additive_measure.put(i, v) self.additive_measure.normalize()
def build_measure(ensemble_matrix, target, score="acc"): """ Function that builds the CPM measure :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances :param target: An array with the real class :param score: The score to use for classifier performance calculation :return: The additive measure """ # ensemble_matrix => num_classifiers, num_instances num_classifiers, num_classes, num_instances = ensemble_matrix.shape # Store the performances performances = dict() # Store the mean of each level level_mean = dict() # Get callable performance function if score == "acc": # performance_function = metrics.accuracy_score performance_function = accuracy elif score == "tpr": performance_function = tpr_mean elif score == "gm": performance_function = gm elif score == "f1": performance_function = get_f_measure_function(target) elif score == "auc": performance_function = get_auc_score_function(target) elif score == "ap": performance_function = get_ap_score_function(target) else: raise Exception( "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'") # for each possible classifier combination for i in all_combs(range(num_classifiers)): classifiers_prob = ensemble_matrix[i, :, :] # Mean of probabilities prob = np.mean(classifiers_prob, axis=0) if score == "auc" or score == "ap": val = performance_function(target, prob.T) else: # prob => num_classes, num_instances pred = np.argmax(prob, axis=0) val = performance_function(target, pred) # Add performances and store for level mean calculation performances[i] = val if len(i) not in level_mean: level_mean[len(i)] = [0.0, 0.0] level_mean[len(i)][0] += val level_mean[len(i)][1] += 1.0 # Calculate the mean per level for k in level_mean.keys(): level_mean[k] = level_mean[k][0] / level_mean[k][1] # Calculate the measure measure = DataStore.DictDataStore(num_classifiers) # For each accuracy set measure value as variation of mean based on difference with the level mean for i in all_combs(range(num_classifiers)): y = performances[i] - level_mean[len(i)] # value = (float(len(i)) / float(num_classifiers)) * (1 + y) value = (float(len(i)) / float(num_classifiers)) + np.tanh( y * 100) / (2.0 * num_classifiers) measure.put(i, value) return measure
def build_global_mhm(ensemble_matrix, target, score, alpha=1.0): """ Function to create the global mhm measure :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances :param target: An array with the real class :param score: The score to use for classifier performance calculation :param alpha: Alpha parameter (paper) :return: """ num_classifiers, num_classes, num_instances = ensemble_matrix.shape confidences = np.empty((num_classifiers, )) # For additive measure additive_measure = DataStore.DictDataStore(num_classifiers) # Get callable score function if score == "acc": performance_function = accuracy elif score == "tpr": performance_function = tpr_mean elif score == "gm": performance_function = gm elif score == "f1": performance_function = get_f_measure_function(target) elif score == "auc": performance_function = get_auc_score_function(target) elif score == "ap": performance_function = get_ap_score_function(target) else: raise Exception( "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'") # For each individual classifier get its performance for i in range(num_classifiers): prob = ensemble_matrix[i, :, :] if score == "auc" or score == "ap": val = performance_function(target, prob.T) else: pred = np.argmax(prob, axis=0) val = performance_function(target, pred) confidences[i] = val # Calculate additive measure for i in all_combs(range(num_classifiers)): if len(i) == 1: additive_measure.put(i, confidences[i[0]]) else: v = 0.0 for j in i: v += additive_measure.get((j, )) additive_measure.put(i, v) additive_measure.normalize() # Compute similarities and relative diversity similarities = compute_similarities(ensemble_matrix) relative_diversity = relative_diversity_dict(similarities) # Calculate the final measure measure = DataStore.DictDataStore(num_classifiers) for i in all_combs(range(num_classifiers)): value = additive_measure.get(i) * (1 + alpha * relative_diversity[i]) measure.put(i, value) measure.correct_monotonicity() measure.normalize() return measure