def train_ensemble(model, acoustic_iterator, linguistic_iterator, optimizer, criterion, reg_ratio): model.train() epoch_loss = 0 conf_mat = ConfusionMatrix(np.zeros((NUM_CLASSES, NUM_CLASSES))) assert len(acoustic_iterator) == len(linguistic_iterator) for acoustic_tuple, linguistic_tuple in zip(acoustic_iterator(), linguistic_iterator()): acoustic_batch = acoustic_tuple[0] labels = acoustic_tuple[1] linguistic_batch = linguistic_tuple[0] optimizer.zero_grad() predictions = model(acoustic_batch, linguistic_batch).squeeze(1) loss = criterion(predictions, labels) reg_loss = 0 for param in model.parameters(): reg_loss += param.norm(2) total_loss = loss + reg_ratio*reg_loss total_loss.backward() optimizer.step() epoch_loss += loss.item() conf_mat += ConfusionMatrix.from_predictions(predictions, labels) average_loss = epoch_loss / len(acoustic_iterator) return average_loss, conf_mat
def train(model, iterator, optimizer, criterion, reg_ratio): model.train() epoch_loss = 0 conf_mat = ConfusionMatrix(np.zeros((NUM_CLASSES, NUM_CLASSES))) for batch, labels in iterator(): optimizer.zero_grad() predictions = model(batch).squeeze(1) loss = criterion(predictions, labels) reg_loss = 0 for param in model.parameters(): reg_loss += param.norm(2) total_loss = loss + reg_ratio*reg_loss total_loss.backward() optimizer.step() epoch_loss += loss.item() conf_mat += ConfusionMatrix.from_predictions(predictions, labels) average_loss = epoch_loss / len(iterator) return average_loss, conf_mat
def evaluate_sense(gold_list, predicted_list): """Evaluate sense classifier The label 'no' is for the relations that are missed by the system because the arguments don't match any of the gold relations. """ sense_alphabet = Alphabet() for relation in gold_list: sense_alphabet.add(relation['Sense'][0]) sense_alphabet.add('no') sense_cm = ConfusionMatrix(sense_alphabet) gold_to_predicted_map, predicted_to_gold_map = \ _link_gold_predicted(gold_list, predicted_list, spans_exact_matching) for i, gold_relation in enumerate(gold_list): if i in gold_to_predicted_map: predicted_sense = gold_to_predicted_map[i]['Sense'][0] if predicted_sense in gold_relation['Sense']: sense_cm.add(predicted_sense, predicted_sense) else: if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = 'no' sense_cm.add(predicted_sense, gold_relation['Sense'][0]) else: sense_cm.add('no', gold_relation['Sense'][0]) for i, predicted_relation in enumerate(predicted_list): if i not in predicted_to_gold_map: predicted_sense = predicted_relation['Sense'][0] if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = 'no' sense_cm.add(predicted_sense, 'no') return sense_cm
def bootstrap_diff(df, ccp_estimator, rounds, sample_size): bootstrap_results = [] for i in range(rounds): # Get first model parameters s1 = df.sample(sample_size, replace=True) bug_g = s1.groupby([classifier, concept], as_index=False).agg({count: 'count'}) bug_cm = ConfusionMatrix(g_df=bug_g, classifier=classifier, concept=concept, count=count) positive_rate = bug_cm.positive_rate() hit_rate = bug_cm.hit_rate() ccp = ccp_estimator.estimate_positives(hit_rate) ccp_diff = ccp - positive_rate # Find difference in given points bootstrap_results.append([positive_rate, hit_rate, ccp, ccp_diff]) if (i % 100 == 0): print("finished " + str(i), datetime.datetime.now()) results_df = pd.DataFrame( bootstrap_results, columns=['positive_rate', 'hit_rate', 'ccp', 'ccp_diff']) return results_df
def fit_predict(self, Gtr, Ytr, Gt, Yt, grid_search, acc_param="F1Mean", RS=0, VERBOSE=False): """Learn the model on training dataset and predict on the testing dataset. Input: - Gtr (array): training subset - Ytr (array): true labels of training subset - Gt (array): testing subset - Yt (array): true labels of testing subset - grid_search (dict): grid search for the CV - acc (str): accuracy parameter for the cross-validation (default "F1Mean", otherwise it will be the OA) - RS (int) : random seed used for the stratified k-fold CV - VERBOSE (bool): verbose (default: False) Return: - confMatrix (object ConfusionMatrix): confusion matrix issued from the classification - yp (array): vector of predicted labels of the testing subset - grid.best_params_ (dict): combination of parameters that gave the best results during the CV TODO: implement own scoring parameter """ if acc_param == "F1Mean": score = 'f1_macro' else: score = 'accuracy' ## Initialization of the stratifield K-CV cv = StratifiedKFold(Ytr, n_folds=self.n_folds, random_state=RS) #Implementation of a fit and a predict methods with parameters from grid search grid = GridSearchCV(self.pipe, param_grid=grid_search, scoring=score, verbose=VERBOSE, cv=cv, n_jobs=3) grid.fit(Gtr, Ytr) model = grid.best_estimator_ if VERBOSE: print grid.best_score_ #Learn model model.fit(Gtr, Ytr) #could use refit in version 0.19 of sklearn #Predict yp = model.predict(Gt) #Compute confusion matrix confMatrix = ConfusionMatrix() confMatrix.compute_confusion_matrix(yp, Yt) return confMatrix, yp, grid.best_params_
def test_storing_loading(): """Test store_preds and load_preds""" # Create confusion matrices for random classifiers yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] ypred1 = np.random.randint(3, size=12) cm1 = ConfusionMatrix(yactual, ypred1, "cls_1") yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] ypred2 = np.random.randint(3, size=12) cm2 = ConfusionMatrix(yactual, ypred2, "cls_2") yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] ypred3 = np.random.randint(3, size=12) cm3 = ConfusionMatrix(yactual, ypred3, "cls_3") yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] ypred4 = np.random.randint(3, size=12) cm4 = ConfusionMatrix(yactual, ypred4, "cls_4") preds = [ypred1, ypred2, ypred3, ypred4] print("Preds before saving", preds) store_preds(preds, yactual, 1) new_preds, actual = load_preds(1) print("Preds after saving", new_preds, "Actual after saving", actual)
def evaluate_performance(df, classification_column, concept_column): g = df.groupby([classification_column, concept_column], as_index=False).agg({'commit': 'count'}) cm = ConfusionMatrix(g_df=g, classifier=classification_column, concept=concept_column, count='commit') return cm.summarize()
def applyClassifiers(scst_pkl_path: str, keras_pkl_path: str, data_path: Optional[str] = None, truth_path: Optional[str] = None, start_second: Optional[float] = None, end_second: Optional[float] = None) -> None: ''' Apply both a ``SCSTClassifier`` and a ``TransientKerasClassifier`` to the data sequence specified by the inputs, and plot the results. :param scst_pkl_path: Full path to a pickle file containing a saved ``SCSTClassifier`` object. :param keras_pkl_path: Full path to a pickle file containing a saved ``TransientKerasClassifier`` object. :param data_path, truth_path, start_second, end_second: See ``data_utils.getPlotDataTuple``. ''' # Define SCST classification parameters SIG_THRESH = 100 NOISE_THRESH = 100 # Load data and classifiers test_matrix, truth_array, start_second, title = getPlotDataTuple( truth_path, data_path, start_second, end_second) scst_classifier = SCSTClassifier.load(scst_pkl_path) keras_classifier = TransientKerasClassifier.load(keras_pkl_path) # Apply classifiers num_obs = test_matrix.shape[1] for test_idx in range(num_obs): scst_classifier.classify(test_matrix[:, test_idx], SIG_THRESH, NOISE_THRESH) keras_classifier.classify(test_matrix[:, test_idx]) _printProgress(test_idx / float(num_obs), title) _printProgress(1.0, title) sys.stdout.write("\n") # Display results id_array_list = [ scst_classifier.class_labels, keras_classifier.class_labels ] id_tag_list = ["SCST IDs", "Keras IDs"] if truth_array is not None: for classifier, name in zip([scst_classifier, keras_classifier], ["SCST", "Keras"]): conf_matrix = ConfusionMatrix(classifier.class_labels, truth_array, True, name) conf_matrix.display() id_array_list.append(truth_array) id_tag_list.append("Truth IDs") plotSequence(test_matrix, start_second, title, id_array_list, id_tag_list) plt.show()
def two_years_analysis(two_years_df , first_metric , second_metric , key): print() print("Co-change" , first_metric , second_metric) g = two_years_df.groupby([first_metric, second_metric] , as_index=False).agg({key : 'count'}) print(g) cm = ConfusionMatrix(g_df=g , classifier=first_metric , concept=second_metric, count=key) print(cm.summarize()) print() print("Samples", cm.samples()) print("Both metrics increment match", cm.accuracy()) print(second_metric , " improvement given " , first_metric , " improvement", cm.precision(), "lift", cm.precision_lift()) print(first_metric , " improvement given " , second_metric , "improvement", cm.recall(), "lift", ifnull(safe_divide(ifnull(cm.recall()),cm.hit_rate())) - 1) print()
def run(self): """ This is for assignment 6, the data is from 20 news groups """ model = BuildModel("../data/features") count_vectors = model.count_vectors() cm = ConfusionMatrix(labels=model.labels) mm = MultinomialMixture(20, count_vectors, n_iterations=4, verbose=True, smoothing=True, confusion_matrix=cm, document_types=model.document_types) mm.learn_parameters() cm.print_matrix()
def perfective_performance(df): perfective_g = df.groupby( ['perfective_pred', 'Is_Perfective'], as_index=False).agg({'commit' : 'count'}) perfective_cm = ConfusionMatrix(g_df=perfective_g , classifier='perfective_pred' , concept='Is_Perfective' , count='commit') print("perfective commit performance") print(perfective_cm.summarize()) return perfective_cm
def test_summrize(classifier , concept , count , g_df , expected): cm = ConfusionMatrix(classifier , concept , count , g_df) actual = cm.summarize() assert expected == actual
def test_independent_prob(classifier , concept , count , g_df , expected): cm = ConfusionMatrix(classifier , concept , count , g_df) actual = cm.independent_prob() assert expected == actual
def corrective_performance(df): bug_g = df.groupby(['corrective_pred', 'Is_Corrective'], as_index=False).agg({'commit': 'count'}) bug_cm = ConfusionMatrix(g_df=bug_g, classifier='corrective_pred', concept='Is_Corrective', count='commit') print("corrective commit performance") print(bug_cm.summarize()) return bug_cm
def refactor_performance(df): refactor_g = df.groupby(['is_refactor_pred', 'Is_Refactor'], as_index=False).agg({'commit': 'count'}) refactor_cm = ConfusionMatrix(g_df=refactor_g, classifier='is_refactor_pred', concept='Is_Refactor', count='commit') print("refactor commit performance") print(refactor_cm.summarize()) return refactor_cm
def adaptive_performance(df): adaptive_g = df.groupby(['adaptive_pred', 'Is_Adaptive'], as_index=False).agg({'commit': 'count'}) adaptive_cm = ConfusionMatrix(g_df=adaptive_g, classifier='adaptive_pred', concept='Is_Adaptive', count='commit') print("adaptive commit performance") print(adaptive_cm.summarize()) return adaptive_cm
def features_confusion_matrix_analysis(two_years_df , first_metric , second_metric , keys): g = two_years_df.groupby([first_metric, second_metric] , as_index=False).agg({keys[0] : 'count'}) cm = ConfusionMatrix(g_df=g , classifier=first_metric , concept=second_metric, count=keys[0]) return cm.summarize()
def leave_one_out(examples, k): conf_matr = ConfusionMatrix() for ex in examples: # disable only this example ex.active = False # run the k-Nearest-Neighbor algorithm rank_list = knn.knn(k, examples, ex) # check the voting for correctness outcome = knn.voting(rank_list) conf_matr.inc_according_to(outcome, ex.outcome) ex.active = True # return the computed confusion matrix return conf_matr
def leave_one_out(examples,k): conf_matr = ConfusionMatrix() for ex in examples: # disable only this example ex.active = False # run the k-Nearest-Neighbor algorithm rank_list = knn.knn(k,examples,ex) # check the voting for correctness outcome = knn.voting(rank_list) conf_matr.inc_according_to(outcome,ex.outcome) ex.active = True # return the computed confusion matrix return conf_matr
def compute_binary_eval_metric(gold_list, predicted_list, matching_fn): """Compute binary evaluation metric """ binary_alphabet = Alphabet() binary_alphabet.add('yes') binary_alphabet.add('no') cm = ConfusionMatrix(binary_alphabet) matched_predicted = [False for x in predicted_list] for gold_span in gold_list: found_match = False for i, predicted_span in enumerate(predicted_list): if matching_fn(gold_span, predicted_span) and not matched_predicted[i]: cm.add('yes', 'yes') matched_predicted[i] = True found_match = True break if not found_match: cm.add('no', 'yes') # Predicted span that does not match with any for matched in matched_predicted: if not matched: cm.add('yes', 'no') return cm
def eval_ensemble(ensemble_model, acoustic_model_iterator, linguistic_model_iterator, criterion): epoch_losses = [] conf_mat = ConfusionMatrix(np.zeros((NUM_CLASSES, NUM_CLASSES))) with torch.no_grad(): for ((acoustic_batch, labels), (linguistic_batch, _)) in zip(acoustic_model_iterator(), linguistic_model_iterator()): predictions = ensemble_model(acoustic_batch, linguistic_batch) predictions = torch.Tensor(predictions) loss = criterion(predictions.float(), labels) epoch_losses.append(loss.item()) conf_mat += ConfusionMatrix.from_predictions(predictions, labels) average_loss = sum(epoch_losses) / len(acoustic_model_iterator) return average_loss, conf_mat
def evaluate_sense(relation_pairs, valid_senses): sense_alphabet = Alphabet() #for g_relation, _ in relation_pairs: #if g_relation is not None: #sense = g_relation['Sense'][0] #if sense in valid_senses: #sense_alphabet.add(sense) for sense in valid_senses: sense_alphabet.add(sense) sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS) sense_alphabet.growing = False sense_cm = ConfusionMatrix(sense_alphabet) for g_relation, p_relation in relation_pairs: assert g_relation is not None or p_relation is not None if g_relation is None: predicted_sense = p_relation['Sense'][0] sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS) elif p_relation is None: gold_sense = g_relation['Sense'][0] if gold_sense in valid_senses: sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense) else: predicted_sense = p_relation['Sense'][0] gold_sense = g_relation['Sense'][0] if gold_sense in valid_senses: sense_cm.add(predicted_sense, gold_sense) return sense_cm
def adaptive_by_negation_performance(df): concept = 'Is_Adaptive' classifier_name = 'adaptive_by_negation_pred' adaptive_g = df.groupby( [classifier_name, concept], as_index=False).agg({'commit' : 'count'}) adaptive_cm = ConfusionMatrix(g_df=adaptive_g , classifier=classifier_name , concept=concept , count='commit') print("adaptive_by_negation commit performance") print(adaptive_cm.summarize()) return adaptive_cm
def predict( self, dataset: Dataset ) -> dict[str, Union[str, list[str], float, ConfusionMatrix]]: """predicts the class labels of the given test set, based on a previously fitted model. :param dataset: dataset consisting of :return: list of predicted values """ prediction_params: dict[str, Union[str, list[str], float, ConfusionMatrix]] = { "branches": self.__branches } predicted_values: list[str] = [] for example, label in dataset: example: dict[str, str] label: str # predict the example predicted_values.append(self.__label_example(example, self.__root)) prediction_params["predictions"]: list[str] = predicted_values prediction_params["accuracy"] = accuracy(dataset.label_sample, predicted_values) prediction_params[ "confusion_matrix"]: ConfusionMatrix = ConfusionMatrix( dataset.label_space, dataset.label_sample, predicted_values) return prediction_params
def setUp(self): label_names = ['banana', 'apple', 'orange'] labels = np.array( [ [0, 1, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0], [0, 0, 1], [0, 0, 1] ] ) predictions = np.array( [ [0.9, 0.1, 0.0], [0.7, 0.1, 0.2], [0.4, 0.3, 0.3], [0.2, 0.6, 0.2], [0.5, 0.2, 0.3], [0.0, 0.0, 1.0], [0.2, 0.1, 0.7], [0.1, 0.8, 0.1], [0.3, 0.5, 0.2], [0.1, 0.0, 0.9], [0.0, 1.0, 0.0] ] ) self.confusion_matrix = ConfusionMatrix(predictions, labels, class_names=label_names)
def evaluate(model, iterator, criterion): model.eval() epoch_loss = 0 conf_mat = ConfusionMatrix(np.zeros((NUM_CLASSES, NUM_CLASSES))) with torch.no_grad(): for batch, labels in iterator(): predictions = model(batch) loss = criterion(predictions.float(), labels) epoch_loss += loss.item() conf_mat += ConfusionMatrix.from_predictions(predictions, labels) average_loss = epoch_loss / len(iterator) return average_loss, conf_mat
def evaluate(self, test_set): x = test_set.x x = self.preprocess_x(x) y = test_set.y y = np_utils.to_categorical(y, test_set.num_classes) y_pred = self.model.predict_proba(x, verbose=0) cf = ConfusionMatrix(y_pred, y) return cf
def make_confusion_matrix(results): true_positives = [r for r in results if r.tp] true_negatives = [r for r in results if r.tn] false_positives = [r for r in results if r.fp] false_negatives = [r for r in results if r.fn] return ConfusionMatrix(len(true_positives), len(false_positives), len(true_negatives), len(false_negatives))
def make_pr_graph(entropies, correct, graph_name, title, mpl_figure=None): """ Plot entropy as a PR curve predicting whether examples are correct or incorrect. """ if mpl_figure is None: mpl_figure = mpl.figure() axes = mpl_figure.gca() assert len(entropies) == len(correct), (len(entropies), len(correct)) pairs = zip(entropies, correct) pairs.sort() max_entropy = float(pairs[-1][0]) min_entropy = float(pairs[0][0]) num_segments = 20.0 segment_size = (max_entropy - min_entropy)/num_segments if segment_size == 0: segment_size = 0.01 thresholds = na.arange(min_entropy, max_entropy + 1, segment_size) X_recall = [] Y_precision = [] for threshold in thresholds: predicted_correct = [(entropy, correct) for (entropy, correct) in pairs if entropy < threshold] predicted_incorrect = [(entropy, correct) for (entropy, correct) in pairs if entropy >= threshold] tp = len([(entropy, correct) for entropy, correct in predicted_incorrect if not correct]) fp = len([(entropy, correct) for entropy, correct in predicted_incorrect if correct]) tn = len([(entropy, correct) for entropy, correct in predicted_correct if correct]) fn = len([(entropy, correct) for entropy, correct in predicted_correct if not correct]) if tp == 0: continue cm = ConfusionMatrix(tp, fp, tn, fn) X_recall.append(cm.recall) Y_precision.append(cm.precision) global marker_i graph_name_paper = ENTROPY_METRIC_PAPER_NAMES[graph_name] axes.plot(X_recall, Y_precision, label=graph_name_paper, marker=markers[marker_i]) marker_i = (marker_i + 1) % len(markers) axes.legend(loc='upper right') axes.set_xlabel("Recall") axes.set_ylabel("Precision") axes.set_ylim(0, 1.1) axes.set_xlim(0, 1.1) axes.set_title("Precision vs Recall") mpl_figure.savefig(title.replace(" ", "_") + ".eps") mpl.show()
def compute_binary_eval_metric(gold_list, predicted_list, matching_fn): """Compute binary evaluation metric """ binary_alphabet = Alphabet() binary_alphabet.add('yes') binary_alphabet.add('no') cm = ConfusionMatrix(binary_alphabet) matched_predicted = [False for x in predicted_list] for gold_span in gold_list: found_match = False for i, predicted_span in enumerate(predicted_list): if matching_fn(gold_span, predicted_span) and \ not matched_predicted[i]: cm.add('yes', 'yes') matched_predicted[i] = True found_match = True break if not found_match: cm.add('no', 'yes') # Predicted span that does not match with any for matched in matched_predicted: if not matched: cm.add('yes', 'no') return cm
def evaluate_sense(relation_pairs, valid_senses): sense_alphabet = Alphabet() for g_relation, _ in relation_pairs: if g_relation is not None: sense = g_relation["Sense"][0] if sense in valid_senses: sense_alphabet.add(sense) sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS) sense_alphabet.growing = False sense_cm = ConfusionMatrix(sense_alphabet) for g_relation, p_relation in relation_pairs: assert g_relation is not None or p_relation is not None if g_relation is None: predicted_sense = p_relation["Sense"][0] sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS) elif p_relation is None: gold_sense = g_relation["Sense"][0] if gold_sense in valid_senses: sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense) else: predicted_sense = p_relation["Sense"][0] gold_sense = g_relation["Sense"][0] if gold_sense in valid_senses: sense_cm.add(predicted_sense, gold_sense) return sense_cm
def train_and_test(self, train_set, test_set): self.classifier = self.classifier_class.train(train_set) predicted_polarities = [ self.classify(document) for (document, polarity) in test_set ] actual_polarities = [polarity for (document, polarity) in test_set] return ConfusionMatrix(predicted_polarities, actual_polarities)
def evaluate_objects(model, corpus_fname, state_type): corpus = annotationIo.load(corpus_fname) state_cls = state_type_from_name(state_type) from g3.inference import nodeSearch taskPlanner = nodeSearch.BeamSearch(model) predictions = [] done = False phrases = set() for i, annotation in enumerate(corpus): start_state = state_cls.from_context(annotation.context) for esdc in annotation.esdcs: #if esdc.text != "the pallet of boxes": # continue #if esdc.text in phrases: # continue isCorrect = annotation.isGroundingCorrect(esdc) if isCorrect != None: ggg = ggg_from_esdc(esdc) groundings = annotation.getGroundings(esdc) assert len(groundings) == 1 grounding = groundings[0] #if "generator" not in grounding.tags: # continue prob = evaluate_ggg(ggg, grounding, start_state, taskPlanner) if prob > 0.7: predicted_class = True else: predicted_class = False predictions.append((predicted_class, isCorrect)) #print "Query: Is object", " ".join(grounding.tags), #print "'" + esdc.text + "'?" #print "System: ", #if predicted_class: # print "Yes." #else: # print "No." #done = True phrases.add(esdc.text) if done: break if done: break tp = len([(p, l) for p, l in predictions if p and p == l]) fp = len([(p, l) for p, l in predictions if p and p != l]) tn = len([(p, l) for p, l in predictions if not p and p == l]) fn = len([(p, l) for p, l in predictions if not p and p != l]) cm = ConfusionMatrix(tp, fp, tn, fn) #cm.print_all() #if len(phrases) > 20: # phrases = random.sample(phrases, 20) #for phrase in sorted(phrases): # print phrase return cm
def __init__(self, raw_auc): if raw_auc is None: raise ValueError("Missing data for `raw_auc`.") self.AUC = raw_auc["AUC"] self.Gini = raw_auc["Gini"] self.confusion_matrices = ConfusionMatrix.read_cms( raw_auc["confusion_matrices"]) # Two Dim Table self.thresholdsAndMetricScores = raw_auc["thresholdsAndMetricScores"] self.maxCriteriaAndMetricScores = raw_auc["maxCriteriaAndMetricScores"]
def tell_a_posteriori_feasibility(self, apos_feasibility): self._confusion_matrix = ConfusionMatrix(apos_feasibility) self._sp = self._confusion_matrix.success_probability() self._ppv = self._confusion_matrix.ppv() self._npv = self._confusion_matrix.npv() self._pending_apos_solutions = [] # log all bindings self.logger.log() self._count_constraint_infeasibles = 0 self._count_repaired = 0
def evaluate_sense(gold_list, predicted_list): print "In function: evaluate_sense"; """Evaluate sense classifier The label ConfusionMatrix.NEGATIVE_CLASS is for the relations that are missed by the system because the arguments don't match any of the gold relations. """ sense_alphabet = Alphabet() valid_senses = validator.identify_valid_senses(gold_list) for relation in gold_list: sense = relation['Sense'][0] if sense in valid_senses: sense_alphabet.add(sense) sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS) sense_cm = ConfusionMatrix(sense_alphabet) gold_to_predicted_map, predicted_to_gold_map = \ _link_gold_predicted(gold_list, predicted_list, spans_exact_matching) for i, gold_relation in enumerate(gold_list): gold_sense = gold_relation['Sense'][0] if gold_sense in valid_senses: if i in gold_to_predicted_map: predicted_sense = gold_to_predicted_map[i]['Sense'][0] if predicted_sense in gold_relation['Sense']: sense_cm.add(predicted_sense, predicted_sense) else: if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS sense_cm.add(predicted_sense, gold_sense) else: sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense) for i, predicted_relation in enumerate(predicted_list): if i not in predicted_to_gold_map: predicted_sense = predicted_relation['Sense'][0] if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS) return sense_cm
def compute_span_exact_match_metric(gold_list, predicted_list, verbose=False): """Compute binary evaluation metric """ binary_alphabet = Alphabet() binary_alphabet.add('yes') binary_alphabet.add('no') cm = ConfusionMatrix(binary_alphabet) matched_predicted = [False for x in predicted_list] predicted = defaultdict(list) for i, pspan in enumerate(predicted_list): predicted[pspan].append(i) empty_list = [] key = indices = None for gold in gold_list: found_match = False indices = predicted.get(gold, empty_list) for i in indices: if not matched_predicted[i]: cm.add('yes', 'yes') matched_predicted[i] = True found_match = True break if not found_match: if verbose: print('Span:') print('<<<\t{:s}'.format(gold).encode(ENCODING)) print() cm.add('no', 'yes') # Predicted span that does not match with any for matched, pred in zip(matched_predicted, predicted_list): if not matched: if verbose: print('Span:') print('>>>\t{:s}'.format(pred).encode(ENCODING)) print() cm.add('yes', 'no') return cm
def evaluate(gold_file, pred_file): with codecs.open(gold_file, encoding="utf-8") as fin_gold, codecs.open(pred_file, encoding="utf-8") as fin_pred: dict_P_to_url_label = {} for line in fin_gold: P, url, label, _ = line.strip().split("\t") if P not in dict_P_to_url_label: dict_P_to_url_label[P] = set() dict_P_to_url_label[P].add((url.strip(), label)) # predict_set = set() for line in fin_pred: url, s, p, o, confidence = line.strip().split("\t") predict_set.add((url.strip(), p)) alphabet = Alphabet() alphabet.add("0") alphabet.add("1") # 评估 marco_p, marco_r, marco_f = 0, 0, 0 N = 0 for P in sorted(dict_P_to_url_label.keys()): confusionMatrix = ConfusionMatrix(alphabet) recall_error_cases = [] precision_error_cases= [] for url, label in dict_P_to_url_label[P]: pred = "0" if (url, P) in predict_set: pred = "1" if label != pred: if label == "1" and pred == "0": recall_error_cases.append("%s\t%s->%s" % (url, label, pred)) if label == "0" and pred == "1": precision_error_cases.append("%s\t%s->%s" % (url, label, pred)) confusionMatrix.add(pred, label) print "==" * 40 print P print confusionMatrix.print_out() p, r, f = confusionMatrix.get_prf("1") marco_p += p marco_r += r marco_f += f N += 1 print "\n==>recall error cases:" print "\n".join(recall_error_cases) print "\n==>precision error cases:" print "\n".join(precision_error_cases) print "**" * 40 print "marco, P: %f; R: %f; F1: %f" % (marco_p / N, marco_r / N, marco_f / N)
class ORIDSESSVC(EvolutionStrategy): description =\ "Ori. Death Penalty Step Control Evolution Strategy (DSES) with SVC" description_short = "Ori. DSES with SVC" def __init__(self, mu, lambd, theta, pi, initial_sigma,\ delta, tau0, tau1, initial_pos, beta, meta_model): super(ORIDSESSVC, self).__init__(mu, lambd) self._theta = theta self._pi = pi self._delta = delta self._infeasibles = 0 self._init_pos = initial_pos self._init_sigma = initial_sigma self._tau0 = tau0 self._tau1 = tau1 # SVC Metamodel self.meta_model = meta_model self.meta_model_trained = False self._beta = beta self._current_population = [] self._valid_solutions = [] self._pending_apos_solutions = [] self.logger.add_const_binding('_theta', 'theta') self.logger.add_const_binding('_pi', 'pi') self.logger.add_const_binding('_tau0', 'tau0') self.logger.add_const_binding('_tau1', 'tau1') self.logger.add_binding('_delta', 'delta') self.logger.add_binding('_sp', 'successprob') self.logger.add_binding('_ppv', 'ppv') self.logger.add_binding('_npv', 'npv') # log constants self.logger.const_log() # initialize population self._initialize_population() # cPickle cannot serialize lambda functions def _mat_mutate_sig(self, sig): mutate_sig = lambda sigma : sigma * exp(self._tau1 * normal(0, 1)) _lmutatesig = vectorize(mutate_sig) return _lmutatesig(sig) # cPickle cannot serialize lambda functions def _mat_mutate_pos(self, coord, sigma): mutate_pos = lambda coord, sigma : coord + normal(0, sigma) _lmutatepos = vectorize(mutate_pos) return _lmutatepos(coord, sigma) # cPickle cannot serialize lambda functions def _mat_reducer(self, x): reducer = lambda sigma : self._delta if sigma < self._delta else sigma _lmatreducer = vectorize(reducer) return _lmatreducer(x) def _initialize_population(self): init_pos, init_sigma = self._init_pos, self._init_sigma d = init_pos.size genpos = lambda pos, sigma : random.normal(pos, sigma) gensig = lambda sigma : sigma # initial mu lambda population, with selection of pairing # probability 1/mu. interval size is equally. s, i = 0.0, (1 / float(self._mu)) while(len(self._current_population) < self._mu): sigma = self._mat_mutate_sig(init_sigma) pos = self._mat_mutate_pos(init_pos, sigma) individual = matrix([pos.getA1(), sigma.getA1()]) self._current_population.append((individual, s, s+i)) s = s+i def _generate_individual(self): # selection of pairing, anti-proportional selection using # the intervals between [0, 1] parents = [] while(len(parents) < 2): x = random.random() for individual, start, end in self._current_population: if(start <= x < end): parents.append(individual) child = 0.5 * (parents[0] + parents[1]) # mutation of sigma self._global_sigma_mutation = exp(self._tau0 * normal(0, 1)) child[SIGMA] = self._mat_mutate_sig(child[SIGMA]) child[SIGMA] = self._global_sigma_mutation * child[SIGMA] if(self._infeasibles % self._pi == 0): self._delta *= self._theta # minimum step size child[SIGMA] = self._mat_reducer(child[SIGMA]) # mutation of position with new step size child[POS] = self._mat_mutate_pos(child[POS], child[SIGMA]) return child def ask_pending_solutions(self): """ ask pending solutions; solutions which need a checking for true feasibility """ individuals = [] while(len(individuals) < 1): if((random.random() < self._beta) and self.meta_model_trained): individual = self._generate_individual() if(self.meta_model.check_feasibility(individual[POS])): individuals.append(individual) # appending meta-feasible solution to a_posteriori pending self._pending_apos_solutions.append((individual, True)) else: # appending meta-infeasible solution to a_posteriori pending self._pending_apos_solutions.append((individual, False)) #individual[POS] = self.meta_model.repair(individual[POS]) #self._count_repaired += 1 #pending_meta_feasible.append(individual) # appending meta-feasible solution to a_posteriori pending #self._pending_apos_solutions.append((individual, True)) else: individual = self._generate_individual() individuals.append(individual) return individuals def tell_feasibility(self, feasibility_information): """ tell feasibilty; return True if there are no pending solutions, otherwise False """ for (child, feasibility) in feasibility_information: if(feasibility): self._valid_solutions.append(child) self._infeasibles = 0 else: self._count_constraint_infeasibles += 1 self._infeasibles += self._infeasibles self.meta_model.add_infeasible(child[POS]) if(len(self._valid_solutions) < self._lambd): return False else: return True def ask_valid_solutions(self): return self._valid_solutions def ask_a_posteriori_solutions(self): return self._pending_apos_solutions def tell_fitness(self, fitnesses): fitness = lambda (child, fitness) : fitness child = lambda (child, fitness) : child position = lambda (child, fitness) : child[POS] sorted_fitnesses = sorted(fitnesses, key = fitness) sorted_children = map(child, sorted_fitnesses) selected_sorted_fitnesses = sorted_fitnesses[:self._mu] # update meta model sort self._valid_solutions by fitness and # unsorted self._sliding_infeasibles sorted_feasibles = map(position, sorted_fitnesses) self.meta_model.add_sorted_feasibles(sorted_feasibles) self.meta_model_trained = self.meta_model.train() """ update the selection probabilites according to anti-proportional fitness. """ probabilities = [] s, a_prop_sum, sum_of_fitnesses = 0.0, 0.0, 0.0 for individual, fitness in selected_sorted_fitnesses: sum_of_fitnesses += fitness for individual, fitness in selected_sorted_fitnesses: a_prop_sum += 1.0 / (fitness / float(sum_of_fitnesses)) for individual, fitness in selected_sorted_fitnesses: p = (1.0 / (fitness / float(sum_of_fitnesses))) / a_prop_sum probabilities.append((individual, p)) probabilities.reverse() """ update the current population """ self._current_population = [] start = 0 for individual, prob in probabilities: self._current_population.append((individual, start, start + prob)) start = s + prob self._current_population.reverse() ### UPDATE FOR NEXT ITERATION self._valid_solutions = [] ### STATISTICS self._selected_children = self._current_population self._best_child, self._best_fitness = selected_sorted_fitnesses[0] self._worst_child, self._worst_fitness = selected_sorted_fitnesses[-1] self._mean_fitness = array(map(lambda (c,f) : f, selected_sorted_fitnesses)).mean() return self._best_child, self._best_fitness def tell_a_posteriori_feasibility(self, apos_feasibility): self._confusion_matrix = ConfusionMatrix(apos_feasibility) self._sp = self._confusion_matrix.success_probability() self._ppv = self._confusion_matrix.ppv() self._npv = self._confusion_matrix.npv() self._pending_apos_solutions = [] # log all bindings self.logger.log() self._count_constraint_infeasibles = 0 self._count_repaired = 0
def learn(n_vow, N_reservoir=100, leaky=True, classification=True, **kwargs): """ function to perform supervised learning on an ESN data: data to be learned (ndarray including AN activations and teacher signals) OLD VERSION n_vow: total number of vowels used N_reservoir: size of ESN leaky: boolean defining if leaky ESN is to be used plots: boolean defining if results are to be plotted output: boolean defining if progress messages are to be displayed testdata: provide test data for manual testing (no cross validation) OLD VERSION separate: boolean defining if infant data is used as test set or test set is drawn randomly from adult+infant (n_vow=3) n_channels: number of channels used classification: boolean defining if sensory classification is performed instead of motor prediction""" output_folder = kwargs['output_folder'] regularization = kwargs['regularization'] logistic = kwargs['logistic'] leak_rate = kwargs['leak_rate'] spectral_radius = kwargs['spectral_radius'] n_channels = kwargs['n_channels'] n_vow = kwargs['n_vowel'] n_samples = kwargs['n_samples'] n_training = kwargs['n_training'] output = kwargs['verbose'] flow = kwargs['flow'] rank = kwargs['rank'] training_set, test_set = get_training_and_test_sets(n_samples, n_training, n_vow) if output: print('samples_test = '+str(test_set)) print('len(samples_train) = '+str(len(training_set))) N_classes = n_vow+1 # number of classes is total number of vowels + null class input_dim = n_channels # input dimension is number of used channels if output: print('constructing reservoir') # construct individual nodes if leaky: # construct leaky reservoir reservoir = Oger.nodes.LeakyReservoirNode(input_dim=input_dim, output_dim=N_reservoir, input_scaling=1., spectral_radius=spectral_radius, leak_rate=leak_rate) # call LeakyReservoirNode with appropriate number of input units and # given number of reservoir units else: # construct non-leaky reservoir reservoir = Oger.nodes.ReservoirNode(input_dim=input_dim, output_dim=N_reservoir, input_scaling=1.) # call ReservoirNode with appropriate number of input units and given number of reservoir units if logistic: readout = Oger.nodes.LogisticRegressionNode() else: readout = Oger.nodes.RidgeRegressionNode(regularization) # construct output units with Ridge Regression training method flow = mdp.Flow([reservoir, readout]) # connect reservoir and output nodes if output: print("Training...") import pdb pdb.set_trace() flow.train([[], training_set]) # train flow with input files provided by file iterator ytest = [] # initialize list of test output if output: print("Applying to testset...") losses = [] # initiate list for discrete recognition variable for each test item ymean = [] # initiate list for true class of each test item ytestmean = [] # initiate list for class vote of trained flow for each test item for i_sample in xrange(len(test_set)): # loop over all test samples if output: print('testing with sample '+str(i_sample)) xtest = test_set[i_sample][0] # load xtest and ytarget as separate numpy arrays ytarget = test_set[i_sample][1] ytest = flow(xtest) # evaluate trained output units' responses for current test item mean_sample_vote = mdp.numx.mean(ytest, axis=0) # average each output neurons' response over time if output: print('mean_sample_vote = '+str(mean_sample_vote)) target = mdp.numx.mean(ytarget, axis=0) # average teacher signals over time if output: print('target = '+str(target)) argmax_vote = sp.argmax(mean_sample_vote) # winner-take-all vote for final classification ytestmean.append(argmax_vote) # append current vote to votes list of all items argmax_target = sp.argmax(target) # evaluate true class of current test item ymean.append(argmax_target) # append current true class to list of all items loss = Oger.utils.loss_01(mdp.numx.atleast_2d(argmax_vote), mdp.numx.atleast_2d(argmax_target)) # call loss_01 to compare vote and true class, 0 if match, 1 else if output: print('loss = '+str(loss)) losses.append(loss) # append current loss to losses of all items xtest = None # destroy xtest, ytest, ytarget, current_data to free up memory ytest = None ytarget = None error = mdp.numx.mean(losses) # error rate is average number of mismatches if output: print('error = '+str(error)) if output: print("error: "+str(error)) print('ymean: '+str(ymean)) print('ytestmean: '+str(ytestmean)) ytestmean = np.array(ytestmean) # convert ytestmean and ymean lists to numpy array for confusion matrix ymean = np.array(ymean) confusion_matrix = ConfusionMatrix.from_data(N_classes, ytestmean, ymean) # 10 classes # create confusion matrix from class votes and true classes c_matrix = confusion_matrix.balance() # normalize confusion matrix c_matrix = np.array(c_matrix) if output: print('confusion_matrix = '+str(c_matrix)) save_flow(flow, N_reservoir, leaky, rank, output_folder) return error, c_matrix # return current error rate and confusion matrix
def compute_binary_eval_metric(predicted_list, gold_list, binary_alphabet): cm = ConfusionMatrix(binary_alphabet) for (predicted_span, gold_span) in zip( predicted_list, gold_list): cm.add(predicted_span, gold_span) return cm
def evaluate_sense(gold_list, predicted_list, verbose=False): """Evaluate sense classifier The label ConfusionMatrix.NEGATIVE_CLASS is for the relations that are missed by the system because the arguments don't match any of the gold relations. """ sense_alphabet = Alphabet() valid_senses = validator.identify_valid_senses(gold_list) isense = None for relation in gold_list: isense = relation['Sense'][0] if isense in valid_senses: sense_alphabet.add(isense) sense_alphabet.add(ConfusionMatrix.NEGATIVE_CLASS) sense_cm = ConfusionMatrix(sense_alphabet) gold_to_predicted_map, predicted_to_gold_map = \ _link_gold_predicted(gold_list, predicted_list, spans_exact_matching) for i, gold_relation in enumerate(gold_list): gold_sense = gold_relation['Sense'][0] if gold_sense in valid_senses: if i in gold_to_predicted_map: predicted_sense = gold_to_predicted_map[i]['Sense'][0] if predicted_sense in gold_relation['Sense']: sense_cm.add(predicted_sense, predicted_sense) else: if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS if verbose: print('Sense:') print('<<<\t{:s}'.format(gold_sense).encode(ENCODING)) print('>>>\t{:s}'.format(predicted_sense).encode( ENCODING)) print('Arg1:\t{:s}'.format( gold_relation['Arg1']['RawText']).encode(ENCODING)) print('Arg2:\t{:s}'.format( gold_relation['Arg2']['RawText']).encode(ENCODING)) print() sense_cm.add(predicted_sense, gold_sense) else: if verbose: print('Sense:') print('<<<\t{:s}'.format(gold_sense).encode(ENCODING)) print('>>>\t{:s}'.format( ConfusionMatrix.NEGATIVE_CLASS).encode( ENCODING)) print('Arg1:\t{:s}'.format( gold_relation['Arg1']['RawText']).encode(ENCODING)) print('Arg2:\t{:s}'.format( gold_relation['Arg2']['RawText']).encode(ENCODING)) print() sense_cm.add(ConfusionMatrix.NEGATIVE_CLASS, gold_sense) for i, predicted_relation in enumerate(predicted_list): if i not in predicted_to_gold_map: predicted_sense = predicted_relation['Sense'][0] if not sense_cm.alphabet.has_label(predicted_sense): predicted_sense = ConfusionMatrix.NEGATIVE_CLASS if verbose: print('Sense:') print('<<<\t{:s}'.format(gold_sense).encode(ENCODING)) print('>>>\t{:s}'.format( ConfusionMatrix.NEGATIVE_CLASS).encode( ENCODING)) print('Arg1:\t{:s}'.format( gold_relation['Arg1']['RawText']).encode(ENCODING)) print('Arg2:\t{:s}'.format( gold_relation['Arg2']['RawText']).encode(ENCODING)) print() sense_cm.add(predicted_sense, ConfusionMatrix.NEGATIVE_CLASS) return sense_cm