Ejemplo n.º 1
0
    def _obtain_weights(self, context, ensemble_name, information):
        """
        Obtain weights from the test pattern set
        :param context:
        :param ensemble_name:
        :param information:
        :return:
        """
        statistic_class = Statistics()
        self.weights = np.zeros(
            len(context["classifiers"][ensemble_name]["classifiers"]))
        outputs_kind = context["classifiers"][ensemble_name]["outputs_kind"]
        for i, classifier_name in enumerate(
                context["classifiers"][ensemble_name]["classifiers"]):
            if context["outputs_kind"] != "validation":
                # Test information may not exist. Just the context[patter_kind] would be built
                information.build_real_outputs(context, classifier_name,
                                               "validation")
                information.discretize_outputs(context, classifier_name,
                                               "validation")

            statistic_class.goodness(
                context, classifier_name,
                information.info[classifier_name][outputs_kind]["validation"],
                context["patterns"].patterns[classifier_name]["validation"])

            self.weights[i] = statistic_class.measures[classifier_name]["E"]

        return statistic_class
Ejemplo n.º 2
0
    def _obtain_weights(self, context, ensemble_name, information):

        """
        Obtain weights from the test pattern set
        :param context:
        :param ensemble_name:
        :param information:
        :return:
        """
        statistic_class = Statistics()
        self.weights = np.zeros(len(context["classifiers"][ensemble_name]["classifiers"]))
        outputs_kind = context["classifiers"][ensemble_name]["outputs_kind"]
        for i, classifier_name in enumerate(context["classifiers"][ensemble_name]["classifiers"]):
            if context["outputs_kind"] != "validation":
                # Test information may not exist. Just the context[patter_kind] would be built
                information.build_real_outputs(context, classifier_name, "validation")
                information.discretize_outputs(context, classifier_name, "validation")

            statistic_class.goodness(context, classifier_name,
                                     information.info[classifier_name][outputs_kind]["validation"],
                                     context["patterns"].patterns[classifier_name]["validation"])

            self.weights[i] = statistic_class.measures[classifier_name]["E"]

        return statistic_class
Ejemplo n.º 3
0
    def instances_error(self, context, classifier_name):
        """
            Measure the error of the classifier giving a list of instances to check.
            """
        statistics_class = Statistics()
        self.info[classifier_name]["selection_errors"] = []
        pattern_kind = context["pattern_kind"]

        if classifier_name in context["classifier_list"]:
            outputs_kind = context["outputs_kind"]
            temporal_patterns = copy.deepcopy(context["patterns"].patterns[classifier_name][pattern_kind])
        else:
            outputs_kind = context["outputs_kind"]
            original = self.info[classifier_name][outputs_kind][pattern_kind]
            original_pattern_ref = context["patterns"].patterns[classifier_name][pattern_kind]

        for counter, filter_list in enumerate(context["filter_list"]):
            #We need to overwrite the context[patterns] variable because build real outputs and discretize use them
            if classifier_name in context["classifier_list"]:
                context["patterns"].modify_patterns_temporally(
                    classifier_name,
                    pattern_kind,
                    context["patterns"].filter_instances(classifier_name, pattern_kind, filter_list))

                self.build_real_outputs(context, classifier_name, pattern_kind)
                self.discretize_outputs(context, classifier_name, pattern_kind)
                ref_patterns = context["patterns"].patterns[classifier_name][pattern_kind]
            else:
                self.info[classifier_name][outputs_kind][pattern_kind] = \
                    [original[i] for i in range(len(original)) if i in filter_list]
                ref_patterns = [original_pattern_ref[i] for i in range(len(original_pattern_ref)) if i in filter_list]

            statistics_class.goodness(context, classifier_name, self.info[classifier_name][outputs_kind][
                pattern_kind], ref_patterns)

            if counter == 0:
                self.info[classifier_name]["selection_errors"].append(
                    [statistics_class.measures[classifier_name][x]['EFP'] for x in context["filter_component"]])
            else:
                self.info[classifier_name]["selection_errors"].append(
                    [statistics_class.measures[classifier_name][x]['EFN'] for x in context["filter_component"]])
                #Recovery the original patterns

            if classifier_name in context["classifier_list"]:
                #Recovery the original patterns
                context["patterns"].modify_patterns_temporally(classifier_name, pattern_kind, temporal_patterns)
                self.build_real_outputs(context, classifier_name, pattern_kind)
                self.discretize_outputs(context, classifier_name, pattern_kind)
            else:
                self.info[classifier_name][outputs_kind][pattern_kind] = original
                from mullpy.ensembles import Ensemble

                Ensemble(context, classifier_name, self, [pattern_kind])
Ejemplo n.º 4
0
    def threshold_determination(self, context, classifier_name,
                                patterns_outputs):
        """
            With the discretized outputs for roc values, determine the best values for the threshold.
            """
        statistics_class = Statistics()
        #Aux structures
        threshold_list = AutoVivification()
        minimum_error = AutoVivification()

        for class_text in context["classifiers"][classifier_name][
                "classes_names"]:
            #Initialize the aux structures
            threshold_list[class_text] = []
            minimum_error[class_text] = float('inf')
            self.info[classifier_name][class_text]["threshold"][
                "medium"] = float('inf')
            self.info[classifier_name][class_text]["threshold"][
                "minimum"] = float('inf')
            self.info[classifier_name][class_text]["threshold"][
                "maximum"] = float('-inf')
            #For each value of threshold generated
        for threshold in self.info[classifier_name]["roc_outputs"]:
            #Calculate the goodness of the classifier
            statistics_class.goodness(
                context, classifier_name,
                self.info[classifier_name]["roc_outputs"][threshold],
                patterns_outputs)
            for class_text in context["classifiers"][classifier_name][
                    "classes_names"]:
                error = 0.0
                for function in context["classifiers"][classifier_name][
                        "thresholds"]["metric"]:
                    getattr(statistics_class,
                            function)(classifier_name, context, self,
                                      "validation")
                    error += statistics_class.measures[classifier_name][
                        class_text][function]
                #If we find a minimum error, we save it
                if error < minimum_error[class_text]:
                    minimum_error[class_text] = error
                    threshold_list[class_text] = [threshold]
                    #When we find a new global minimum we have to reset the list
                    #And save it again
                    #If there is a tie in terms of goodness, save all the range of values with the minimum error
                if error == minimum_error[class_text]:
                    threshold_list[class_text].append(threshold)
                    #Determine different kinds of thresholds

                if len(threshold_list[class_text]) == 0:
                    raise ValueError("There is no threshold selected")
        return threshold_list
Ejemplo n.º 5
0
    def classes_error(self, context, classifier_name):

        self.info[classifier_name]["selection_errors"] = []

        statistics_class = Statistics()
        values = AutoVivification()
        pattern_kind = context["pattern_kind"]
        outputs_kind = context["outputs_kind"]

        if classifier_name in context["classifier_list"]:
            temporal_patterns = copy.deepcopy(context["patterns"].patterns[classifier_name][pattern_kind])
        else:
            original = self.info[classifier_name][outputs_kind][pattern_kind]
            original_pattern_ref = context["patterns"].patterns[classifier_name][pattern_kind]

        for i in range(1, len(context["classifiers"][classifier_name]["classes_names"])):
            temp = [1] * i
            temp.extend([-1] * (len(context["classifiers"][classifier_name]["classes_names"]) - i))
            values[i] = [temp]
            for new in permutations(values[i][0]):
                if new not in values[i]:
                    values[i].append(new)

            if classifier_name in context["classifier_list"]:
                context["patterns"].modify_patterns_temporally(classifier_name, pattern_kind,
                                                               context["patterns"].filter_classes(classifier_name,
                                                                                                  pattern_kind,
                                                                                                  values[i]))
                self.build_real_outputs(context, classifier_name, pattern_kind)
                self.discretize_outputs(context, classifier_name, pattern_kind)
                ref_patterns = context["patterns"].patterns[classifier_name][pattern_kind]
            else:
                positions = [position for position, instance in enumerate(original_pattern_ref) if
                             instance[1] in values[i]]
                self.info[classifier_name][outputs_kind][pattern_kind] = \
                    [original[i] for i in range(len(original)) if i in positions]
                ref_patterns = [original_pattern_ref[i] for i in range(len(original_pattern_ref)) if i in positions]

            statistics_class.goodness(context, classifier_name, self.info[classifier_name][outputs_kind][
                pattern_kind], ref_patterns)
            self.info[classifier_name]["selection_errors"].append(statistics_class.measures[classifier_name]['E'])

            if classifier_name in context["classifier_list"]:
                #Recovery the original patterns
                context["patterns"].modify_patterns_temporally(classifier_name, pattern_kind, temporal_patterns)
                self.build_real_outputs(context, classifier_name, pattern_kind)
                self.discretize_outputs(context, classifier_name, pattern_kind)
            else:
                self.info[classifier_name][outputs_kind][pattern_kind] = original
                from mullpy.ensembles import Ensemble

                Ensemble(context, classifier_name, self, [pattern_kind])
Ejemplo n.º 6
0
    def build_roc(self, context, classifier_name, pattern_outputs):
        """
            Build the tpr and tnr for the ROC curve of the classifier given.
            """
        len_outputs = len(self.info[classifier_name]["roc_outputs"].keys())
        self.info[classifier_name]['tpr'] = np.zeros(len_outputs,
                                                     dtype=np.float32)
        self.info[classifier_name]['tnr'] = np.zeros(len_outputs,
                                                     dtype=np.float32)
        for component in context["classifiers"][classifier_name][
                "classes_names"]:
            self.info[classifier_name][component]['tpr'] = np.zeros(
                len_outputs, dtype=np.float32)
            self.info[classifier_name][component]['tnr'] = np.zeros(
                len_outputs, dtype=np.float32)
        statistics_class = Statistics()

        for i, threshold in enumerate(
                sorted(self.info[classifier_name]["roc_outputs"])):
            statistics_class.goodness(
                context, classifier_name,
                self.info[classifier_name]["roc_outputs"][threshold],
                pattern_outputs)
            statistics_class.tpr(classifier_name, context)
            statistics_class.tnr(classifier_name, context)
Ejemplo n.º 7
0
    def build_real_outputs(self, context, classifier_name, pattern_text):
        """
        Construct the array for the test and validation outputs in real values
        If the classifier works in a different range than zero-one the continuous outputs are projected linearly
        """
        values_list = context["classifiers"][classifier_name][
            "instance"].real_outputs(
                context, classifier_name,
                context["patterns"].patterns[classifier_name][pattern_text])
        transformed_list = []

        for i, classes_list in enumerate(values_list):
            temp = np.zeros(len(classes_list), dtype=np.float32)
            for j, value in enumerate(classes_list):
                if context["classifiers"][classifier_name]["patterns"][
                        "range"] is not [0, 1]:
                    value = Statistics().change_ranges(
                        value,
                        oldMin=context["classifiers"][classifier_name]
                        ["patterns"]["range"][0],
                        oldMax=context["classifiers"][classifier_name]
                        ["patterns"]["range"][1],
                        newMin=0,
                        newMax=1)
                temp[j] = value
            transformed_list.append(temp)
        self.info[classifier_name]["continuous_outputs"][
            pattern_text] = np.asarray(transformed_list)
Ejemplo n.º 8
0
    def threshold_determination(self, context, classifier_name, patterns_outputs):
        """
            With the discretized outputs for roc values, determine the best values for the threshold.
            """
        statistics_class = Statistics()
        #Aux structures
        threshold_list = AutoVivification()
        minimum_error = AutoVivification()

        for class_text in context["classifiers"][classifier_name]["classes_names"]:
        #Initialize the aux structures
            threshold_list[class_text] = []
            minimum_error[class_text] = float('inf')
            self.info[classifier_name][class_text]["threshold"]["medium"] = float('inf')
            self.info[classifier_name][class_text]["threshold"]["minimum"] = float('inf')
            self.info[classifier_name][class_text]["threshold"]["maximum"] = float('-inf')
            #For each value of threshold generated
        for threshold in self.info[classifier_name]["roc_outputs"]:
            #Calculate the goodness of the classifier
            statistics_class.goodness(context, classifier_name, self.info[classifier_name]["roc_outputs"][threshold],
                                     patterns_outputs)
            for class_text in context["classifiers"][classifier_name]["classes_names"]:
                error = 0.0
                for function in context["classifiers"][classifier_name]["thresholds"]["metric"]:
                    getattr(statistics_class, function)(classifier_name, context, self, "validation")
                    error += statistics_class.measures[classifier_name][class_text][function]
                #If we find a minimum error, we save it
                if error < minimum_error[class_text]:
                    minimum_error[class_text] = error
                    threshold_list[class_text] = [threshold]
                    #When we find a new global minimum we have to reset the list
                    #And save it again
                    #If there is a tie in terms of goodness, save all the range of values with the minimum error
                if error == minimum_error[class_text]:
                    threshold_list[class_text].append(threshold)
                    #Determine different kinds of thresholds

                if len(threshold_list[class_text]) == 0:
                    raise ValueError("There is no threshold selected")
        return threshold_list
Ejemplo n.º 9
0
    def build_roc(self, context, classifier_name, pattern_outputs):
        """
            Build the tpr and tnr for the ROC curve of the classifier given.
            """
        len_outputs = len(self.info[classifier_name]["roc_outputs"].keys())
        self.info[classifier_name]['tpr'] = np.zeros(len_outputs, dtype=np.float32)
        self.info[classifier_name]['tnr'] = np.zeros(len_outputs, dtype=np.float32)
        for component in context["classifiers"][classifier_name]["classes_names"]:
            self.info[classifier_name][component]['tpr'] = np.zeros(len_outputs, dtype=np.float32)
            self.info[classifier_name][component]['tnr'] = np.zeros(len_outputs, dtype=np.float32)
        statistics_class = Statistics()

        for i, threshold in enumerate(sorted(self.info[classifier_name]["roc_outputs"])):
            statistics_class.goodness(context, classifier_name, self.info[classifier_name]["roc_outputs"][threshold],
                                     pattern_outputs)
            statistics_class.tpr(classifier_name, context)
            statistics_class.tnr(classifier_name, context)
Ejemplo n.º 10
0
    def instances_error(self, context, classifier_name):
        """
            Measure the error of the classifier giving a list of instances to check.
            """
        statistics_class = Statistics()
        self.info[classifier_name]["selection_errors"] = []
        pattern_kind = context["pattern_kind"]

        if classifier_name in context["classifier_list"]:
            outputs_kind = context["outputs_kind"]
            temporal_patterns = copy.deepcopy(
                context["patterns"].patterns[classifier_name][pattern_kind])
        else:
            outputs_kind = context["outputs_kind"]
            original = self.info[classifier_name][outputs_kind][pattern_kind]
            original_pattern_ref = context["patterns"].patterns[
                classifier_name][pattern_kind]

        for counter, filter_list in enumerate(context["filter_list"]):
            #We need to overwrite the context[patterns] variable because build real outputs and discretize use them
            if classifier_name in context["classifier_list"]:
                context["patterns"].modify_patterns_temporally(
                    classifier_name, pattern_kind,
                    context["patterns"].filter_instances(
                        classifier_name, pattern_kind, filter_list))

                self.build_real_outputs(context, classifier_name, pattern_kind)
                self.discretize_outputs(context, classifier_name, pattern_kind)
                ref_patterns = context["patterns"].patterns[classifier_name][
                    pattern_kind]
            else:
                self.info[classifier_name][outputs_kind][pattern_kind] = \
                    [original[i] for i in range(len(original)) if i in filter_list]
                ref_patterns = [
                    original_pattern_ref[i]
                    for i in range(len(original_pattern_ref))
                    if i in filter_list
                ]

            statistics_class.goodness(
                context, classifier_name,
                self.info[classifier_name][outputs_kind][pattern_kind],
                ref_patterns)

            if counter == 0:
                self.info[classifier_name]["selection_errors"].append([
                    statistics_class.measures[classifier_name][x]['EFP']
                    for x in context["filter_component"]
                ])
            else:
                self.info[classifier_name]["selection_errors"].append([
                    statistics_class.measures[classifier_name][x]['EFN']
                    for x in context["filter_component"]
                ])
                #Recovery the original patterns

            if classifier_name in context["classifier_list"]:
                #Recovery the original patterns
                context["patterns"].modify_patterns_temporally(
                    classifier_name, pattern_kind, temporal_patterns)
                self.build_real_outputs(context, classifier_name, pattern_kind)
                self.discretize_outputs(context, classifier_name, pattern_kind)
            else:
                self.info[classifier_name][outputs_kind][
                    pattern_kind] = original
                from mullpy.ensembles import Ensemble

                Ensemble(context, classifier_name, self, [pattern_kind])
Ejemplo n.º 11
0
    def classes_error(self, context, classifier_name):

        self.info[classifier_name]["selection_errors"] = []

        statistics_class = Statistics()
        values = AutoVivification()
        pattern_kind = context["pattern_kind"]
        outputs_kind = context["outputs_kind"]

        if classifier_name in context["classifier_list"]:
            temporal_patterns = copy.deepcopy(
                context["patterns"].patterns[classifier_name][pattern_kind])
        else:
            original = self.info[classifier_name][outputs_kind][pattern_kind]
            original_pattern_ref = context["patterns"].patterns[
                classifier_name][pattern_kind]

        for i in range(
                1,
                len(context["classifiers"][classifier_name]["classes_names"])):
            temp = [1] * i
            temp.extend([-1] * (len(
                context["classifiers"][classifier_name]["classes_names"]) - i))
            values[i] = [temp]
            for new in permutations(values[i][0]):
                if new not in values[i]:
                    values[i].append(new)

            if classifier_name in context["classifier_list"]:
                context["patterns"].modify_patterns_temporally(
                    classifier_name, pattern_kind,
                    context["patterns"].filter_classes(classifier_name,
                                                       pattern_kind,
                                                       values[i]))
                self.build_real_outputs(context, classifier_name, pattern_kind)
                self.discretize_outputs(context, classifier_name, pattern_kind)
                ref_patterns = context["patterns"].patterns[classifier_name][
                    pattern_kind]
            else:
                positions = [
                    position
                    for position, instance in enumerate(original_pattern_ref)
                    if instance[1] in values[i]
                ]
                self.info[classifier_name][outputs_kind][pattern_kind] = \
                    [original[i] for i in range(len(original)) if i in positions]
                ref_patterns = [
                    original_pattern_ref[i]
                    for i in range(len(original_pattern_ref)) if i in positions
                ]

            statistics_class.goodness(
                context, classifier_name,
                self.info[classifier_name][outputs_kind][pattern_kind],
                ref_patterns)
            self.info[classifier_name]["selection_errors"].append(
                statistics_class.measures[classifier_name]['E'])

            if classifier_name in context["classifier_list"]:
                #Recovery the original patterns
                context["patterns"].modify_patterns_temporally(
                    classifier_name, pattern_kind, temporal_patterns)
                self.build_real_outputs(context, classifier_name, pattern_kind)
                self.discretize_outputs(context, classifier_name, pattern_kind)
            else:
                self.info[classifier_name][outputs_kind][
                    pattern_kind] = original
                from mullpy.ensembles import Ensemble

                Ensemble(context, classifier_name, self, [pattern_kind])