예제 #1
0
def build_measure_GISFM(ensemble_matrix, target, score):
    """
    Function to create the global ISFM measure
    :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances
    :param target: An array with the real class
    :param score: The score to use for classifier performance calculation
    :return:
    """
    num_classifiers, num_classes, num_instances = ensemble_matrix.shape
    similarities = compute_similarities(ensemble_matrix)
    # To store measure
    measure = DataStore.DictDataStore(num_classifiers)
    confidences = np.empty((num_classifiers, ))
    # Get the callable score function
    if score == "acc":
        performance_function = accuracy
    elif score == "tpr":
        performance_function = tpr_mean
    elif score == "gm":
        performance_function = gm
    elif score == "f1":
        performance_function = get_f_measure_function(target)
    elif score == "auc":
        performance_function = get_auc_score_function(target)
    elif score == "ap":
        performance_function = get_ap_score_function(target)
    else:
        raise Exception(
            "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'")
    # For each individual classifier get its performance
    for i in range(num_classifiers):
        # Get accuracy of classifiers i
        prob = ensemble_matrix[i, :, :]
        if score == "auc" or score == "ap":
            val = performance_function(target, prob.T)
        else:
            pred = np.argmax(prob, axis=0)
            val = performance_function(target, pred)
        confidences[i] = val
        measure.put((i, ), 0.0)
    # Get the order of confidences
    order = np.argsort(confidences)
    # Calculate values
    for i in range(len(order)):
        s = similarities[order[i], order[i + 1:]]
        if len(s) == 0:
            s = 0.0
        else:
            s = s.max()
        measure.put((order[i], ), confidences[order[i]] * (1 - s))

    for i in all_combs(range(num_classifiers)):
        if len(i) > 1:
            v = 0.0
            for j in i:
                v += measure.get((j, ))
            measure.put(i, v)

    measure.normalize()
    return measure
예제 #2
0
def build_measure_m_aggregation(ensemble_matrix,
                                target,
                                m_function,
                                score="acc"):
    """
    Returns the measure for the OIFM method
    :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances
    :param target: An array with the real class
    :param m_function: The function to use for measure calculation
    :param score: The score to use for classifier performance calculation
    :return: Measure
    """
    # ensemble_matrix => num_classifiers, num_instances
    num_classifiers, num_classes, num_instances = ensemble_matrix.shape
    performances = np.empty((num_classifiers, ))
    if score == "acc":
        performance_function = accuracy
    elif score == "tpr":
        performance_function = tpr_mean
    elif score == "gm":
        performance_function = gm
    elif score == "f1":
        performance_function = get_f_measure_function(target)
    elif score == "auc":
        performance_function = get_auc_score_function(target)
    elif score == "ap":
        performance_function = get_ap_score_function(target)
    else:
        raise Exception(
            "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'")
    # For each individual classifier get its performance
    for i in range(num_classifiers):
        # Get accuracy of classifiers i
        prob = ensemble_matrix[i, :, :]
        if score == "auc" or score == "ap":
            val = performance_function(target, prob.T)
        else:
            pred = np.argmax(prob, axis=0)
            val = performance_function(target, pred)
        performances[i] = val

    measure = DataStore.DictDataStore(num_classifiers)
    # Calculate denominator
    performances_2 = np.power(performances, 2)
    denominator = m_function(performances_2)
    # For each combination get the measure value
    for i in all_combs(range(num_classifiers)):
        v = np.zeros((num_classifiers, ))
        for j in i:
            v[j] = performances_2[j]
        nominator = m_function(v)
        measure.put(i, nominator / denominator)

    return measure
예제 #3
0
def build_measure_additive(ensemble_matrix, target, score="acc"):
    """
    Function that builds the additive measure
    :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances
    :param target: An array with the real class
    :param score: The score to use for classifier performance calculation
    :return: The additive measure
    """
    # ensemble_matrix => num_classifiers, num_instances
    num_classifiers, num_classes, num_instances = ensemble_matrix.shape
    performances = np.empty(num_classifiers)
    if score == "acc":
        # performance_function = metrics.accuracy_score
        performance_function = accuracy
    elif score == "tpr":
        performance_function = tpr_mean
    elif score == "gm":
        performance_function = gm
    elif score == "f1":
        performance_function = get_f_measure_function(target)
    elif score == "auc":
        performance_function = get_auc_score_function(target)
    elif score == "ap":
        performance_function = get_ap_score_function(target)
    else:
        raise Exception(
            "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'")
    # for each possible classifier combination
    for i in range(num_classifiers):
        # Get accuracy of classifiers i
        prob = ensemble_matrix[i, :, :]
        if score == "auc" or score == "ap":
            val = performance_function(target, prob.T)
        else:
            # prob => num_classes, num_instances
            pred = np.argmax(prob, axis=0)
            val = performance_function(target, pred)
        performances[i] = val

    level_mean = performances.mean()
    y = performances - level_mean
    values = (1.0 /
              num_classifiers) + np.tanh(y * 100) / (2.0 * num_classifiers)

    measure = DataStore.DictDataStore(num_classifiers)
    # For each accuracy set measure value as variation of mean based on difference with the level mean
    for i in all_combs(range(num_classifiers)):
        value = 0.0
        for j in i:
            value += values[j]
        measure.put(i, value)

    measure.normalize()
    return measure
예제 #4
0
    def __init__(self,
                 ensemble_matrix,
                 target,
                 score='acc',
                 integral="choquet"):
        """

        :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances
        :param target: An array with the real class
        :param score: The performance function to use
        :param integral: The integral to use (choquet or sugeno)
        """
        # ensemble_matrix: (num_classifiers, num_classes, num_instances)
        num_classifiers, num_classes, num_instances = ensemble_matrix.shape
        # Calculate the strength for each classifier
        self.g = np.empty((num_classifiers, ))
        self.integral = integral
        if self.integral == "choquet":
            self.integral_function = self.choquet
        elif self.integral == "sugeno":
            self.integral_function = self.sugeno
        else:
            raise Exception("integral must be choquet or sugeno")
        if score == "acc":
            performance = accuracy
        elif score == "tpr":
            performance = tpr_mean
        elif score == "gm":
            performance = gm
        elif score == "f1":
            performance = get_f_measure_function(target)
        elif score == "auc":
            performance = get_auc_score_function(target)
        elif score == "ap":
            performance = get_ap_score_function(target)
        else:
            raise Exception("'score' must be acc, tpr, gm, f1, auc or ap")
        # Calculate performances
        for i in range(num_classifiers):
            if score == "auc" or score == "ap":
                self.g[i] = performance(target, ensemble_matrix[i, :, :].T)
            else:
                pred = np.argmax(ensemble_matrix[i, :, :], axis=0)
                self.g[i] = performance(target, pred)

        # Calculate the mean, and apply CPM
        gmean = np.mean(self.g)
        gdiff = self.g - gmean
        self.g = np.ones((num_classifiers, )) / float(num_classifiers)
        self.g = self.g + np.tanh(gdiff * 100) / (2.0 * num_classifiers)

        # Calculate lambda
        self.lmb = self.calculate_lambda(self.g)
예제 #5
0
    def __init__(self, ensemble_matrix, target, score="acc"):
        """
        Constructor
        :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances
        :param target: The target (true class) of each example
        :param score: The score function (performance) to use for weight calculation
        Valid scores: 'acc', 'tpr', 'gm', 'f1', 'auc' and 'ap'
        """
        # ensemble_matrix => num_classifiers, num_classes, num_instances
        num_classifiers, num_classes, num_instances = ensemble_matrix.shape
        # Create an empty array
        performances = np.empty((num_classifiers, ))
        # Get the callable function
        if score == "acc":
            performance = metrics.accuracy_score
        elif score == "tpr":
            performance = tpr_mean
        elif score == "gm":
            performance = gm
        elif score == "f1":
            performance = get_f_measure_function(target)
        elif score == "auc":
            performance = get_auc_score_function(target)
        elif score == "ap":
            performance = get_ap_score_function(target)
        else:
            raise Exception(
                "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'")
        # For each classifier, get the performance
        for i in range(num_classifiers):
            # For auc and ap, the needed value is the probability of being of positive class
            if score == "auc" or score == "ap":
                val = performance(target, ensemble_matrix[i, :, :].T)
            else:
                # For the other metrics, the predicted class (max confidence class)
                pred = np.argmax(ensemble_matrix[i, :, :], axis=0)
                val = performance(target, pred)
            # Set the performance
            performances[i] = val

        # Store normalized accuracies
        self.weights = performances / np.sum(performances)
예제 #6
0
    def __init__(self, ensemble_matrix, target, dynamic_measure_function,
                 score, integral):
        """

        :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances
        :param target: An array with the real class
        :param dynamic_measure_function: The function that returns the mesure values
        :param score: The score to use for classifier performance calculation
        :param integral: The integral to use (choquet or sugeno)
        """
        # Assertions
        assert integral.lower() in ["choquet", "sugeno"]
        assert score.lower() in ["acc", "tpr", "gm", "f1", "auc", "ap"]
        num_classifiers, num_classes, num_instances = ensemble_matrix.shape
        self.ensemble_matrix = ensemble_matrix
        self.target = target
        self.dynamic_measure_function = dynamic_measure_function
        self.integral = integral.lower()
        # Get callable score
        if score.lower() == "acc":
            performance_function = accuracy
        elif score.lower() == "tpr":
            performance_function = tpr_mean
        elif score == "gm":
            performance_function = gm
        elif score == "f1":
            performance_function = get_f_measure_function(target)
        elif score == "auc":
            performance_function = get_auc_score_function(target)
        elif score == "ap":
            performance_function = get_ap_score_function(target)
        else:
            raise Exception(
                "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'")
        # Calculate the confidence of each classifier
        self.confidences = np.empty((num_classifiers, ))
        for i in range(num_classifiers):
            prob = ensemble_matrix[i, :, :]
            if score == "auc" or score == "ap":
                self.confidences[i] = performance_function(target, prob.T)
            else:
                pred = np.argmax(prob, axis=0)
                self.confidences[i] = performance_function(target, pred)
        # Calculate the similarities
        self.similarities = compute_similarities(ensemble_matrix)
        # If the dynamic function is mhm
        if self.dynamic_measure_function == dynamic_mhm:
            # Calculate the relative diversity
            self.relative_diversity = relative_diversity_dict(
                self.similarities)
            # Calculate the additive measure
            self.additive_measure = DataStore.DictDataStore(
                self.confidences.shape[0])
            for i in all_combs(range(self.confidences.shape[0])):
                if len(i) == 1:
                    self.additive_measure.put(i, self.confidences[i[0]])
                else:
                    v = 0.0
                    for j in i:
                        v += self.additive_measure.get((j, ))
                    self.additive_measure.put(i, v)
            self.additive_measure.normalize()
예제 #7
0
def build_measure(ensemble_matrix, target, score="acc"):
    """
    Function that builds the CPM measure
    :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances
    :param target: An array with the real class
    :param score: The score to use for classifier performance calculation
    :return: The additive measure
    """
    # ensemble_matrix => num_classifiers, num_instances
    num_classifiers, num_classes, num_instances = ensemble_matrix.shape
    # Store the performances
    performances = dict()
    # Store the mean of each level
    level_mean = dict()
    # Get callable performance function
    if score == "acc":
        # performance_function = metrics.accuracy_score
        performance_function = accuracy
    elif score == "tpr":
        performance_function = tpr_mean
    elif score == "gm":
        performance_function = gm
    elif score == "f1":
        performance_function = get_f_measure_function(target)
    elif score == "auc":
        performance_function = get_auc_score_function(target)
    elif score == "ap":
        performance_function = get_ap_score_function(target)
    else:
        raise Exception(
            "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'")
    # for each possible classifier combination
    for i in all_combs(range(num_classifiers)):
        classifiers_prob = ensemble_matrix[i, :, :]
        # Mean of probabilities
        prob = np.mean(classifiers_prob, axis=0)
        if score == "auc" or score == "ap":
            val = performance_function(target, prob.T)
        else:
            # prob => num_classes, num_instances
            pred = np.argmax(prob, axis=0)
            val = performance_function(target, pred)
        # Add performances and store for level mean calculation
        performances[i] = val
        if len(i) not in level_mean:
            level_mean[len(i)] = [0.0, 0.0]
        level_mean[len(i)][0] += val
        level_mean[len(i)][1] += 1.0

    # Calculate the mean per level
    for k in level_mean.keys():
        level_mean[k] = level_mean[k][0] / level_mean[k][1]

    # Calculate the measure
    measure = DataStore.DictDataStore(num_classifiers)
    # For each accuracy set measure value as variation of mean based on difference with the level mean
    for i in all_combs(range(num_classifiers)):
        y = performances[i] - level_mean[len(i)]
        # value = (float(len(i)) / float(num_classifiers)) * (1 + y)
        value = (float(len(i)) / float(num_classifiers)) + np.tanh(
            y * 100) / (2.0 * num_classifiers)
        measure.put(i, value)

    return measure
예제 #8
0
def build_global_mhm(ensemble_matrix, target, score, alpha=1.0):
    """
    Function to create the global mhm measure
    :param ensemble_matrix: A numpy array of num_classifiers by num_classes by num_instances
    :param target: An array with the real class
    :param score: The score to use for classifier performance calculation
    :param alpha: Alpha parameter (paper)
    :return:
    """
    num_classifiers, num_classes, num_instances = ensemble_matrix.shape
    confidences = np.empty((num_classifiers, ))
    # For additive measure
    additive_measure = DataStore.DictDataStore(num_classifiers)
    # Get callable score function
    if score == "acc":
        performance_function = accuracy
    elif score == "tpr":
        performance_function = tpr_mean
    elif score == "gm":
        performance_function = gm
    elif score == "f1":
        performance_function = get_f_measure_function(target)
    elif score == "auc":
        performance_function = get_auc_score_function(target)
    elif score == "ap":
        performance_function = get_ap_score_function(target)
    else:
        raise Exception(
            "score must be 'acc', 'tpr', 'gm', 'f1', 'auc' or 'ap'")
    # For each individual classifier get its performance
    for i in range(num_classifiers):
        prob = ensemble_matrix[i, :, :]
        if score == "auc" or score == "ap":
            val = performance_function(target, prob.T)
        else:
            pred = np.argmax(prob, axis=0)
            val = performance_function(target, pred)
        confidences[i] = val

    # Calculate additive measure
    for i in all_combs(range(num_classifiers)):
        if len(i) == 1:
            additive_measure.put(i, confidences[i[0]])
        else:
            v = 0.0
            for j in i:
                v += additive_measure.get((j, ))
            additive_measure.put(i, v)
    additive_measure.normalize()

    # Compute similarities and relative diversity
    similarities = compute_similarities(ensemble_matrix)
    relative_diversity = relative_diversity_dict(similarities)

    # Calculate the final measure
    measure = DataStore.DictDataStore(num_classifiers)
    for i in all_combs(range(num_classifiers)):
        value = additive_measure.get(i) * (1 + alpha * relative_diversity[i])
        measure.put(i, value)

    measure.correct_monotonicity()
    measure.normalize()
    return measure