def build_real_outputs(self, context, classifier_name, pattern_text): """ Construct the array for the test and validation outputs in real values If the classifier works in a different range than zero-one the continuous outputs are projected linearly """ values_list = context["classifiers"][classifier_name][ "instance"].real_outputs( context, classifier_name, context["patterns"].patterns[classifier_name][pattern_text]) transformed_list = [] for i, classes_list in enumerate(values_list): temp = np.zeros(len(classes_list), dtype=np.float32) for j, value in enumerate(classes_list): if context["classifiers"][classifier_name]["patterns"][ "range"] is not [0, 1]: value = Statistics().change_ranges( value, oldMin=context["classifiers"][classifier_name] ["patterns"]["range"][0], oldMax=context["classifiers"][classifier_name] ["patterns"]["range"][1], newMin=0, newMax=1) temp[j] = value transformed_list.append(temp) self.info[classifier_name]["continuous_outputs"][ pattern_text] = np.asarray(transformed_list)
def build_roc(self, context, classifier_name, pattern_outputs): """ Build the tpr and tnr for the ROC curve of the classifier given. """ len_outputs = len(self.info[classifier_name]["roc_outputs"].keys()) self.info[classifier_name]['tpr'] = np.zeros(len_outputs, dtype=np.float32) self.info[classifier_name]['tnr'] = np.zeros(len_outputs, dtype=np.float32) for component in context["classifiers"][classifier_name][ "classes_names"]: self.info[classifier_name][component]['tpr'] = np.zeros( len_outputs, dtype=np.float32) self.info[classifier_name][component]['tnr'] = np.zeros( len_outputs, dtype=np.float32) statistics_class = Statistics() for i, threshold in enumerate( sorted(self.info[classifier_name]["roc_outputs"])): statistics_class.goodness( context, classifier_name, self.info[classifier_name]["roc_outputs"][threshold], pattern_outputs) statistics_class.tpr(classifier_name, context) statistics_class.tnr(classifier_name, context)
def _obtain_weights(self, context, ensemble_name, information): """ Obtain weights from the test pattern set :param context: :param ensemble_name: :param information: :return: """ statistic_class = Statistics() self.weights = np.zeros( len(context["classifiers"][ensemble_name]["classifiers"])) outputs_kind = context["classifiers"][ensemble_name]["outputs_kind"] for i, classifier_name in enumerate( context["classifiers"][ensemble_name]["classifiers"]): if context["outputs_kind"] != "validation": # Test information may not exist. Just the context[patter_kind] would be built information.build_real_outputs(context, classifier_name, "validation") information.discretize_outputs(context, classifier_name, "validation") statistic_class.goodness( context, classifier_name, information.info[classifier_name][outputs_kind]["validation"], context["patterns"].patterns[classifier_name]["validation"]) self.weights[i] = statistic_class.measures[classifier_name]["E"] return statistic_class
def threshold_determination(self, context, classifier_name, patterns_outputs): """ With the discretized outputs for roc values, determine the best values for the threshold. """ statistics_class = Statistics() #Aux structures threshold_list = AutoVivification() minimum_error = AutoVivification() for class_text in context["classifiers"][classifier_name][ "classes_names"]: #Initialize the aux structures threshold_list[class_text] = [] minimum_error[class_text] = float('inf') self.info[classifier_name][class_text]["threshold"][ "medium"] = float('inf') self.info[classifier_name][class_text]["threshold"][ "minimum"] = float('inf') self.info[classifier_name][class_text]["threshold"][ "maximum"] = float('-inf') #For each value of threshold generated for threshold in self.info[classifier_name]["roc_outputs"]: #Calculate the goodness of the classifier statistics_class.goodness( context, classifier_name, self.info[classifier_name]["roc_outputs"][threshold], patterns_outputs) for class_text in context["classifiers"][classifier_name][ "classes_names"]: error = 0.0 for function in context["classifiers"][classifier_name][ "thresholds"]["metric"]: getattr(statistics_class, function)(classifier_name, context, self, "validation") error += statistics_class.measures[classifier_name][ class_text][function] #If we find a minimum error, we save it if error < minimum_error[class_text]: minimum_error[class_text] = error threshold_list[class_text] = [threshold] #When we find a new global minimum we have to reset the list #And save it again #If there is a tie in terms of goodness, save all the range of values with the minimum error if error == minimum_error[class_text]: threshold_list[class_text].append(threshold) #Determine different kinds of thresholds if len(threshold_list[class_text]) == 0: raise ValueError("There is no threshold selected") return threshold_list
def instances_error(self, context, classifier_name): """ Measure the error of the classifier giving a list of instances to check. """ statistics_class = Statistics() self.info[classifier_name]["selection_errors"] = [] pattern_kind = context["pattern_kind"] if classifier_name in context["classifier_list"]: outputs_kind = context["outputs_kind"] temporal_patterns = copy.deepcopy( context["patterns"].patterns[classifier_name][pattern_kind]) else: outputs_kind = context["outputs_kind"] original = self.info[classifier_name][outputs_kind][pattern_kind] original_pattern_ref = context["patterns"].patterns[ classifier_name][pattern_kind] for counter, filter_list in enumerate(context["filter_list"]): #We need to overwrite the context[patterns] variable because build real outputs and discretize use them if classifier_name in context["classifier_list"]: context["patterns"].modify_patterns_temporally( classifier_name, pattern_kind, context["patterns"].filter_instances( classifier_name, pattern_kind, filter_list)) self.build_real_outputs(context, classifier_name, pattern_kind) self.discretize_outputs(context, classifier_name, pattern_kind) ref_patterns = context["patterns"].patterns[classifier_name][ pattern_kind] else: self.info[classifier_name][outputs_kind][pattern_kind] = \ [original[i] for i in range(len(original)) if i in filter_list] ref_patterns = [ original_pattern_ref[i] for i in range(len(original_pattern_ref)) if i in filter_list ] statistics_class.goodness( context, classifier_name, self.info[classifier_name][outputs_kind][pattern_kind], ref_patterns) if counter == 0: self.info[classifier_name]["selection_errors"].append([ statistics_class.measures[classifier_name][x]['EFP'] for x in context["filter_component"] ]) else: self.info[classifier_name]["selection_errors"].append([ statistics_class.measures[classifier_name][x]['EFN'] for x in context["filter_component"] ]) #Recovery the original patterns if classifier_name in context["classifier_list"]: #Recovery the original patterns context["patterns"].modify_patterns_temporally( classifier_name, pattern_kind, temporal_patterns) self.build_real_outputs(context, classifier_name, pattern_kind) self.discretize_outputs(context, classifier_name, pattern_kind) else: self.info[classifier_name][outputs_kind][ pattern_kind] = original from mullpy.ensembles import Ensemble Ensemble(context, classifier_name, self, [pattern_kind])
def classes_error(self, context, classifier_name): self.info[classifier_name]["selection_errors"] = [] statistics_class = Statistics() values = AutoVivification() pattern_kind = context["pattern_kind"] outputs_kind = context["outputs_kind"] if classifier_name in context["classifier_list"]: temporal_patterns = copy.deepcopy( context["patterns"].patterns[classifier_name][pattern_kind]) else: original = self.info[classifier_name][outputs_kind][pattern_kind] original_pattern_ref = context["patterns"].patterns[ classifier_name][pattern_kind] for i in range( 1, len(context["classifiers"][classifier_name]["classes_names"])): temp = [1] * i temp.extend([-1] * (len( context["classifiers"][classifier_name]["classes_names"]) - i)) values[i] = [temp] for new in permutations(values[i][0]): if new not in values[i]: values[i].append(new) if classifier_name in context["classifier_list"]: context["patterns"].modify_patterns_temporally( classifier_name, pattern_kind, context["patterns"].filter_classes(classifier_name, pattern_kind, values[i])) self.build_real_outputs(context, classifier_name, pattern_kind) self.discretize_outputs(context, classifier_name, pattern_kind) ref_patterns = context["patterns"].patterns[classifier_name][ pattern_kind] else: positions = [ position for position, instance in enumerate(original_pattern_ref) if instance[1] in values[i] ] self.info[classifier_name][outputs_kind][pattern_kind] = \ [original[i] for i in range(len(original)) if i in positions] ref_patterns = [ original_pattern_ref[i] for i in range(len(original_pattern_ref)) if i in positions ] statistics_class.goodness( context, classifier_name, self.info[classifier_name][outputs_kind][pattern_kind], ref_patterns) self.info[classifier_name]["selection_errors"].append( statistics_class.measures[classifier_name]['E']) if classifier_name in context["classifier_list"]: #Recovery the original patterns context["patterns"].modify_patterns_temporally( classifier_name, pattern_kind, temporal_patterns) self.build_real_outputs(context, classifier_name, pattern_kind) self.discretize_outputs(context, classifier_name, pattern_kind) else: self.info[classifier_name][outputs_kind][ pattern_kind] = original from mullpy.ensembles import Ensemble Ensemble(context, classifier_name, self, [pattern_kind])