def train_full(data=None): data = data or get_tokenizer() train_sents, test_sents = train_test_split(data, test_size=0.2, shuffle=False) X_train = [sent2features(sent2tokens(s)) for s in data] y_train = [sent2labels(s) for s in data] X_test = [sent2features(sent2tokens(s)) for s in test_sents] y_test = [sent2labels(s) for s in test_sents] crf = sklearn_crfsuite.CRF( algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True, model_filename='models/model.bin' ) crf.fit(X_train, y_train) start = time.time() y_pred = crf.predict(X_test) end = time.time() test_time = end - start F1 = metrics.flat_f1_score(y_test, y_pred, average='weighted') print(F1) print("Test time: ", test_time) print(metrics.flat_classification_report( y_test, y_pred, digits=3 ))
def train_test(data=None): train_sents, dev_sents, test_sents = data or get_tokenizer() X_train = [sent2features(sent2tokens(s)) for s in train_sents] y_train = [sent2labels(s) for s in train_sents] print(len(X_train), len(y_train)) X_dev = [sent2features(sent2tokens(s)) for s in dev_sents] y_dev = [sent2labels(s) for s in dev_sents] X_test = [sent2features(sent2tokens(s)) for s in test_sents] y_test = [sent2labels(s) for s in test_sents] crf = sklearn_crfsuite.CRF( algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True, model_filename='model/model.bin' ) crf.fit(X_train, y_train, X_dev=X_dev, y_dev=y_dev) start = time.time() y_pred = crf.predict(X_test) end = time.time() test_time = end - start F1 = metrics.flat_f1_score(y_test, y_pred, average='weighted') print("F1: ", F1) print("Test time: ", test_time) print(metrics.flat_classification_report( y_test, y_pred, digits=3 ))
def testing(crf,X_test,time_seq=[],y_test=[],save=0): if y_test: print("Results:") labels = list(crf.classes_) y_pred = crf.predict(X_test) sorted_labels = [str(x) for x in sorted(labels,key=lambda name: (name[1:], name[0]))] print(metrics.flat_classification_report(y_test, y_pred, digits=3, labels=sorted_labels)) # plot_results(y_pred,X_test,time_seq,save) return metrics.flat_accuracy_score(y_test, y_pred) # *** , labels=sorted_labels) else: y_pred = crf.predict(X_test) plot_results(y_pred,X_test,time_seq,save) return y_pred
def train(self): """ 训练 """ self.initialize_model() x_train, y_train = self.corpus.generator() self.model.fit(x_train, y_train) labels = list(self.model.classes_) x_test, y_test = self.corpus.generator(train=False) y_predict = self.model.predict(x_test) metrics.flat_f1_score(y_test, y_predict, average='weighted', labels=labels) sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0])) print(metrics.flat_classification_report(y_test, y_predict, labels=sorted_labels, digits=3)) self.save_model()
def _print_metrics(y_pred, y_true): labels = get_labels(y_true) print("Sequence accuracy: {:0.1%}".format( metrics.sequence_accuracy_score(y_true, y_pred)) ) print("Per-tag F1: {:0.3f}".format( metrics.flat_f1_score(y_true, y_pred, average='macro', labels=labels) )) print("Per-tag Classification report: \n{}".format( metrics.flat_classification_report(y_true, y_pred, labels=labels, digits=3)) )
def evaluate(self, predictions): all_predictions = [] for pred_sent_tags in predictions: sentence_predictions = [] for tag in pred_sent_tags: sentence_predictions.append(self.corpus.idx2tag[np.argmax(tag)]) all_predictions.append(sentence_predictions) all_true = [] for sentence in self.corpus.vectorize_y_data(self.corpus.test_tags): sent_true = [self.corpus.idx2tag[np.argmax(tag)] for tag in sentence] all_true.append(sent_true) labels = set(self.corpus.tag2idx.keys()) labels.remove('PADDED') if self.corpus.corpus == 'comtravo': labels.remove('O-') else: labels.remove('O') print(flat_classification_report(all_true, all_predictions, labels=sorted(list(labels))))
def train(self): """训练""" self.initialize_model() x, y = self.corpus.generator() x_train, y_train = x[500:], y[500:] x_test, y_test = x[:500], y[:500] self.model.fit(x_train, y_train) labels = list(self.model.classes_) labels.remove('O') y_predict = self.model.predict(x_test) metrics.flat_f1_score(y_test, y_predict, average='weighted', labels=labels) sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0])) print( metrics.flat_classification_report(y_test, y_predict, labels=sorted_labels, digits=3)) self.save_model()
def executeCRF(filename, RANDOM, path): """Execute CRF model""" # Prepare data getter = dt.Sentences(filename) # instance of the class Sentences tool = dt.Tools() # instance of the class Tools sentences = getter.getSentences() X = [ getter.sent2features(s) for s in sentences ] # convert the tuples list into a features dictionary for each word of the sentence. y = [getter.sent2labels(s) for s in sentences ] # create a list that represents the tags of the sentence X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=RANDOM) # split train an test # Training the CRF model crf = CRF( algorithm='lbfgs', # Gradient descent using the L-BFGS method c1=0.1, c2=0.1, max_iterations=50, all_possible_transitions=True, # verbose = True, ) model = crf.fit(X_train, y_train) labels = list(model.classes_) # get classes labels.remove('O') # remove class O (other) n_labels = sorted(labels, key=lambda x: x.split("-")[1]) # sort labels # Evaluation y_pred = model.predict(X_test) metric1 = metrics.flat_classification_report( y_test, y_pred, labels=n_labels) # individual tags evaluation metric2 = classification_report(y_test, y_pred) # composed tags evaluation # Print results ln = '-' * 100 print(metric1) print(ln) print(metric2)
def train(self, entity_type, train=TRAIN_FILE, test=DEV_FILE): print("Training " + entity_type + "CRF...") train_file = open(train, "rb") test_file = open(test, "rb") train_sents = pickle.load(train_file) X_train = [self.sent2features(sent) for sent in train_sents] y_train = [self.sent2labels(sent, entity_type) for sent in train_sents] test_sents = pickle.load(test_file) X_test = [self.sent2features(sent) for sent in test_sents] y_test = [self.sent2labels(sent, entity_type) for sent in test_sents] crf = sklearn_crfsuite.CRF( algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True ) crf.fit(X_train, y_train) labels = list(crf.classes_) labels.remove('O') # print(labels) y_pred = crf.predict(X_test) print("F1 Score:") print(metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=labels)) # group B, I, L, U results sorted_labels = sorted( labels, key=lambda name: (name[1:], name[0]) ) print(metrics.flat_classification_report( y_test, y_pred, labels=sorted_labels, digits=3 ))
def go(): lines = get_rhythm_annotation(sys.argv[1]) #lines = get_meter_annotation(sys.argv[1]) #print(lines) X = [sent2features(sentence) for sentence in lines] y = [sent2labels(sentence) for sentence in lines] #X, y = zip(*lines) #print(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) classifier = sklearn_crfsuite.CRF() classifier.fit(X_train, y_train) #print(list(zip(X_test, y_test))) print('Train Size', len(X_train)) print('Test Size', len(X_test)) y_pred = classifier.predict(X_test) flat_y_test = [item for sublist in y_test for item in sublist] flat_y_pred = [item for sublist in y_pred for item in sublist] print('Acc ', classifier.score(X_test, y_test)) cm = ConfusionMatrix(flat_y_test, flat_y_pred) print(cm) outlines = lines[-3:] Xoutlines = [sent2features(line) for line in outlines] pred = classifier.predict(Xoutlines) print(list(zip(outlines, pred))) labels = list(classifier.classes_) #labels.remove(' ') sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0])) print( metrics.flat_classification_report(y_test, y_pred, labels=sorted_labels, digits=3)) #outfile = open('footmeter.model.joblib', 'w') joblib.dump(classifier, 'caesura.rhythm.model.joblib')
def execute(self, params, **kwargs): labels = list(self.marvin_model['crf'].classes_) labels.remove('O') y_pred = self.marvin_model['crf'].predict( self.marvin_dataset['X_test']) score = metrics.flat_f1_score(self.marvin_dataset['y_test'], y_pred, average='weighted', labels=labels) sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0])) report = metrics.flat_classification_report( self.marvin_dataset['y_test'], y_pred, labels=sorted_labels, digits=3) self.marvin_metrics = {'score': score, 'report': report} print('Balanced F-score: ' + str(score)) print('\nClassification Report: \n' + str(report))
def run(neg_ratio=0, val_ratio=0.05, data_dir='../../data/data_40/'): train_sents, val_sents = data_sampler(neg_ratio, val_ratio, data_dir) train_sents = get_sents(train_sents) val_sents = get_sents(val_sents) X_train = [sent2features(s) for s in train_sents] Y_train = [sent2labels(s) for s in train_sents] X_val = [sent2features(s) for s in val_sents] Y_val = [sent2labels(s) for s in val_sents] crf = CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=False) crf.fit(X_train, Y_train) labels = list(crf.classes_) sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0])) Y_pred = crf.predict(X_val) ## print validation f1 score print('evaluating: neg ratio: {}, val ratio: {}'.format( neg_ratio, val_ratio)) print( metrics.flat_classification_report(Y_val, Y_pred, labels=sorted_labels, digits=3)) print("Evaluate: dev seg exact") p, r, f = seg_exact_match( Y_pred, Y_val, out + 'seg_' + str(neg_ratio) + 'neg', '../../data/val_segs/' + 'seg_' + str(neg_ratio) + 'neg') return crf
def print_classification_report(annotations, n_folds=10, model=None): """ Evaluate model, print classification report """ if model is None: # FIXME: we're overfitting on hyperparameters - they should be chosen # using inner cross-validation, not set to fixed values beforehand. model = get_model(use_precise_form_types=True) annotations = [a for a in annotations if a.fields_annotated] form_types = formtype_model.get_realistic_form_labels( annotations=annotations, n_folds=n_folds, full_type_names=False ) X, y = get_Xy(annotations=annotations, form_types=form_types, full_type_names=True) cv = get_annotation_folds(annotations, n_folds=n_folds) y_pred = cross_val_predict(model, X, y, cv=cv, n_jobs=-1) all_labels = list(annotations[0].field_schema.types.keys()) labels = sorted(set(flatten(y_pred)), key=lambda k: all_labels.index(k)) print(flat_classification_report(y, y_pred, digits=2, labels=labels, target_names=labels)) print("{:0.1f}% fields are classified correctly.".format(flat_accuracy_score(y, y_pred) * 100)) print("All fields are classified correctly in {:0.1f}% forms.".format(sequence_accuracy_score(y, y_pred) * 100))
def print_classification_report(annotations, n_splits=10, model=None): """ Evaluate model, print classification report """ if model is None: # FIXME: we're overfitting on hyperparameters - they should be chosen # using inner cross-validation, not set to fixed values beforehand. model = get_model(use_precise_form_types=True) annotations = [a for a in annotations if a.fields_annotated] form_types = formtype_model.get_realistic_form_labels( annotations=annotations, n_splits=n_splits, full_type_names=False ) X, y = get_Xy( annotations=annotations, form_types=form_types, full_type_names=True, ) group_kfold = GroupKFold(n_splits=n_splits) groups = [get_domain(ann.url) for ann in annotations] y_pred = cross_val_predict(model, X, y, cv=group_kfold, groups=groups, n_jobs=-1) all_labels = list(annotations[0].field_schema.types.keys()) labels = sorted(set(flatten(y_pred)), key=lambda k: all_labels.index(k)) print((flat_classification_report(y, y_pred, digits=2, labels=labels, target_names=labels))) print(( "{:0.1f}% fields are classified correctly.".format( flat_accuracy_score(y, y_pred) * 100 ) )) print(( "All fields are classified correctly in {:0.1f}% forms.".format( sequence_accuracy_score(y, y_pred) * 100 ) ))
def model_testing(Y_test, output_path, testing_start_date, testing_end_date, chain_len): X_test = loadX(testing_start_date, testing_end_date) X_test = dataFillNA(X_test) # fill na tmp_columns = X_test.columns.tolist() tmp_columns.remove('date') all_data = X_test.merge(Y_test, on='date', how='inner') X_test = all_data[tmp_columns] Y_test = all_data['Y'] test_dates = all_data['date'] del all_data gc.collect() X_test = Xpoint2Set(X_test, chain_len) Y_test_pair = Ypoint2Set(Y_test, chain_len) with open(output_path + 'crf_model.pkl', 'rb') as tmp_fi: # dump model crf = pickle.load(tmp_fi) y_pred = crf.predict(X_test) # test pair labels = Y_test.astype('str').unique() print(metrics.flat_classification_report(Y_test_pair, y_pred, labels=labels, digits=3)) # test single y_pred_single = y_pred[0].copy() y_pred_single.pop(-1) y_pred_single.extend([tmp_y[1] for tmp_y in y_pred]) # y_pred_single.insert(0, y_pred[0][0]) y_real_singel = Y_test.astype('str').tolist() prsc = precision_score(y_real_singel, y_pred_single, labels=labels, average='micro') print('%s to %s weighted precision: %f' % (testing_start_date, testing_end_date, prsc)) prediction = pd.DataFrame(test_dates) prediction.loc[:, 'predict'] = y_pred_single return prediction, prsc
def classify_dataframe(self): x_tests = list() y_tests = list() classes = list(self.classifier.classes_) logger.debug("Using Conditional Random Fields for Layout Classification") my_module = importlib.import_module(self.module_name) FeatureExtractor = getattr(my_module, "FeatureExtractor") f_extractor = FeatureExtractor() for file in self.read_all_train_files(self.test_path): file_df = pd.read_excel(open(file, mode='rb'), sheetname='Sheet1') file_texts = list() file_labels = list() for index, row in file_df.iterrows(): file_texts.append(row[Tag.TEXT.value]) file_labels.append(row[Tag.TAG.value]) file_features = f_extractor.extract_features(file_texts, 7, 7) x_tests.append(file_features) y_tests.append(file_labels) y_predict = self.classifier.predict(x_tests) eval_score = metrics.flat_classification_report(y_tests, y_predict, labels=classes, digits=3) print(eval_score) self.eval_score_writer(eval_score=eval_score) return None
def evaluatemodel(self, x_test, y_test, model_, myobj): #Androw preds = model_.evaluate(x=x_test, y=y_test) mywriting = Writelogs() mywriting.writing("evaluate = " + str(preds[0]), "Test Accuracy = " + str(preds[1]) + '\n') # Eval pred_cat = model_.predict(x_test) pred = np.argmax(pred_cat, axis=-1) y_te_true = np.argmax(y_test, -1) mywriting.writing("predict = " + str(preds[0]), "Test Accuracy = " + str(preds[1]) + '\n') # Convert the index to tag pred_tag = [[myobj.idx2tag[i] for i in row] for row in pred] y_te_true_tag = [[myobj.idx2tag[i] for i in row] for row in y_te_true] report = flat_classification_report(y_pred=pred_tag, y_true=y_te_true_tag) print(report) mywriting.writing(report) return model_
def predict(self): dl = get_bert_data_loader_for_predict( os.path.join(self.data_path + "/valid.csv"), self.learner) self.learner.load_model() preds = self.learner.predict(dl) pred_tokens, pred_labels = self.custom_bert_labels2tokens( dl, preds, fn=self.custom_voting_choicer) true_tokens, true_labels = self.custom_bert_labels2tokens( dl, [x.labels for x in dl.dataset], fn=self.custom_voting_choicer) assert pred_tokens == true_tokens tokens_report = flat_classification_report( true_labels, pred_labels, labels=self.learner.sup_labels, digits=3) logging.info('#' * 100) logging.info('Language: ' + self.target_language) logging.info('POS: ' + self.pos) logging.info('Fold: ' + str(self.fold)) logging.info(tokens_report) logging.info('#' * 100)
def test_model(model_path, labels, x_test, y_test): # test the model """ positive category negative category retrieved true positive TP (pred/real correct) false positive FP (pred correct, real wrong) not retrieved false negative FN (pred wrong, real correct) true negative TN (pred/real wrong) """ """ accuracy: (TP + TN) / all precision: TP / (TP + FP) - how well model can find the negative category recall: TF / (TP + FN) - how well model can find the positive category f1-score: 2 * precision * recall / (precision + recall) - how stable the model is (2 / f1 = 1 / precision + 1 / recall) """ model = joblib.load(model_path) y_predict = model.predict(x_test) # f1_score = metrics.f1_score(y_true=y_test, y_pred=y_predict, labels=labels) # sort the labels according to alphabet sorted_labels = sorted(labels, key=lambda BIO_tag: (BIO_tag[1:], BIO_tag[0])) report = metrics.flat_classification_report(y_true=y_test, y_pred=y_predict, labels=sorted_labels, digits=3) # support: number of times each label shows up return report
def evaluate_rnn(y, preds): """Because the RNN sequences get clipped as necessary based on the `max_length` parameter, they have to be realigned to get a classification report. This method does that, building in the assumption that any clipped tokens are assigned an incorrect label. Parameters ---------- y : list of list of labels preds : list of list of labels Both of these lists need to have the same length, but the sequences they contain can vary in length. """ labels = sorted({c for ex in y for c in ex}) new_preds = [] for gold, pred in zip(y, preds): delta = len(gold) - len(pred) if delta > 0: # Make a *wrong* guess for these clipped tokens: pred += [ random.choice(list(set(labels) - {label})) for label in gold[-delta:] ] new_preds.append(pred) labels = sorted({cls for ex in y for cls in ex} - {'OTHER'}) data = {} data['classification_report'] = flat_classification_report(y, new_preds) data['f1_macro'] = flat_f1_score(y, new_preds, average='macro') data['f1_micro'] = flat_f1_score(y, new_preds, average='micro') data['f1'] = flat_f1_score(y, new_preds, average=None) data['precision_score'] = flat_precision_score(y, new_preds, average=None) data['recall_score'] = flat_recall_score(y, new_preds, average=None) data['accuracy'] = flat_accuracy_score(y, new_preds) data['sequence_accuracy_score'] = sequence_accuracy_score(y, new_preds) return data
def get_crf_metrics(y_pred, y_true, labels): token_acc_score = round(metrics.flat_accuracy_score(y_true, y_pred), 2) token_recall_score = round( metrics.flat_recall_score(y_true, y_pred, average='weighted', labels=labels), 2) token_f1_score = round( metrics.flat_f1_score(y_true, y_pred, average='weighted', labels=labels), 2) token_precision_score = round( metrics.flat_precision_score(y_true, y_pred, average='weighted', labels=labels), 2) report = metrics.flat_classification_report(y_true, y_pred, labels=labels, output_dict=True) report_df = pd.DataFrame(report).T report_df = report_df.round(2) cm_dict = metrics.performance_measure(y_true, y_pred) cm = np.array([[cm_dict['TN'], cm_dict['FP']], [cm_dict['FN'], cm_dict['TP']]]) support = cm_dict['FN'] + cm_dict['TP'] res_d = { 'accuracy': token_acc_score, 'recall': token_recall_score, 'f1_score': token_f1_score, 'precision': token_precision_score, 'support': support, 'cm': cm, 'report': report_df } return res_d
def evaluate_model(crf, X_dev, y_dev, sub_task): """Evaluate the model Inputs: crf: trained CRF model X_dev: list of feature dicts for dev set y_dev: list of labels for dev set Returns: None (prints metrics) """ #Get the labels we're evaluating labels = list(crf.classes_) #Ignore in-character dialogue and stage directions if sub_task: labels.remove('IN-CHA_') labels.remove('STAGE__') else: labels.remove('IN-CHARACTER_DIALOGUE') labels.remove('STAGE_DIRECTIONS') print("Predicting labels") y_pred = crf.predict(X_dev) #print(y_pred[:10]) #for debugging #print(y_dev[:10]) #for debugging print("Displaying accuracy") metrics.flat_f1_score(y_dev, y_pred, average='weighted', labels=labels) sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0])) print("Displaying detailed metrics") print( metrics.flat_classification_report(y_dev, y_pred, labels=sorted_labels, digits=3))
def predict(test_file): # prepare the data test_sents = list(nltk.corpus.conll2002.iob_sents(test_file)) X_test = [sent2features(s) for s in test_sents] y_test = [sent2labels(s) for s in test_sents] # predict crf = pickle.load(open("crf_model.pkl", "r")) labels = list(crf.classes_) labels.remove('O') y_pred = crf.predict(X_test) print(X_test) print(y_pred) print('') # evaluate the model metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=labels) sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0])) print(metrics.flat_classification_report( y_test, y_pred, labels=sorted_labels, digits=3 ))
def print_report_RS(model, y_test, y_pred, sorted_labels): """ :param model: :param y_test: :param y_pred: :param sorted_labels: :type model: sklearn_crfsuite.CRF :type y_test: list :type y_pred: list :type sorted_labels: list """ def print_transitions(trans_features): for (label_from, label_to), weight in trans_features: print("%-6s -> %-7s %0.6f" % (label_from, label_to, weight)) print( metrics.flat_classification_report(y_test, y_pred, labels=sorted_labels, digits=3)) print("Top likely transitions:") print_transitions(Counter(model.transition_features_).most_common(20)) print("\nTop unlikely transitions:") print_transitions(Counter(model.transition_features_).most_common()[-20:]) def print_state_features(state_features): for (attr, label), weight in state_features: print("%0.6f %-8s %s" % (weight, label, attr)) print("Top positive:") print_state_features(Counter(model.state_features_).most_common(30)) print("\nTop negative:") print_state_features(Counter(model.state_features_).most_common()[-30:])
def evaluatemodel(self, x_test, y_test,model_,myobj,cvscores): #Androw preds = model_.evaluate(x=x_test, y=y_test) mywriting = Writelogs() mywriting.writing("evaluate = " + str(preds[0]), "Test Accuracy = " + str(preds[1])+'\n') # Eval pred_cat = model_.predict(x_test) pred = np.argmax(pred_cat, axis=-1) y_te_true = np.argmax(y_test, -1) mywriting.writing("predict = " + str(preds[0]), "Test Accuracy = " + str(preds[1])+'\n') # Convert the index to tag pred_tag = [[myobj.idx2tag[i] for i in row] for row in pred] y_te_true_tag = [[myobj.idx2tag[i] for i in row] for row in y_te_true] report = flat_classification_report(y_pred=pred_tag, y_true=y_te_true_tag) print(report) mywriting.writing(report) ############### scores = model_.evaluate(x_test, y_test, verbose=0) print("%s: %.2f%%" % (model_.metrics_names[1], scores[1] * 100)) cvscores.append(scores[1] * 100) print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores))) mywriting.writing(("model_.metrics_names scores"),str((model_.metrics_names[1], scores[1] * 100))) mywriting.writing(("mean(cvscores) np.std(cvscores)"),str((np.mean(cvscores), np.std(cvscores)))) return model_
def execute(self, inputs, outputs, labels): self.logger.info("building model") model = CRF(**self.model_dict) self.logger.info("fitting model") model.fit(inputs, outputs) self.logger.info("validating model") labels.remove('O') y_pred = model.predict(inputs) flat_f1_score = metrics.flat_f1_score(outputs, y_pred, average='weighted', labels=labels) self.logger.info('flat f1 score: {}'.format(flat_f1_score)) validation = metrics.flat_classification_report(outputs, y_pred, labels=labels, digits=4) self.logger.info('\n' + validation) return model
def trainCRFAndEvaluate(X_train, y_train, X_test, y_test, labels, c1=0.1, c2=0.1, hyperparam_optim=False, n_cv=3, n_iter=10): for i in range(len(y_train)): if y_train[i][0] is None: y_train[i][0] = 'none' if hyperparam_optim == False: crf = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=c1, c2=c2, max_iterations=100, all_possible_transitions=True) # Train the model crf.fit(X_train, y_train) # Create the predictions y_pred = crf.predict(X_test) for i in range(len(y_pred)): if y_pred[i][0] is None: y_pred[i][0] = 'none' print( metrics.flat_classification_report(y_test, y_pred, labels=labels, digits=3)) return (crf) elif hyperparam_optim == True: # Define fixed parameters and parameters to search crf = sklearn_crfsuite.CRF(algorithm='lbfgs', max_iterations=100, all_possible_transitions=True) ## Parameter search # Use the same metric for evaluation f1_scorer = make_scorer(metrics.flat_f1_score, labels=labels, average='weighted') params_space = { 'c1': scipy.stats.expon(scale=0.5), 'c2': scipy.stats.expon(scale=0.05), } rs = RandomizedSearchCV(crf, params_space, cv=n_cv, verbose=1, n_jobs=-1, n_iter=n_iter, scoring=f1_scorer) start_time = time.time() rs.fit(X_train, y_train) print("Hyperparameter optimization took %s seconds to complete" % round((time.time() - start_time), 2)) print('best params:', rs.best_params_) print('best CV score:', rs.best_score_) print('model size: {:0.2f}M'.format(rs.best_estimator_.size_ / 1000000)) crf = rs.best_estimator_ crf.fit(X_train, y_train) # Create the predictions y_pred = crf.predict(X_test) for i in range(len(y_pred)): if y_pred[i][0] is None: y_pred[i][0] = 'none' print( metrics.flat_classification_report(y_test, y_pred, labels=labels, digits=3)) return (crf)
return out test_pred = model.predict(X_test, verbose=1) pred_labels = pred2label(test_pred) test_labels = pred2label(y_test) # pip install seqeval from seqeval.metrics import precision_score, recall_score, f1_score, classification_report print("F1-score: {:.1%}".format(f1_score(test_labels, pred_labels))) # ! pip install sklearn_crfsuite from sklearn_crfsuite.metrics import flat_classification_report report = flat_classification_report(y_pred=pred_labels, y_true=test_labels) print(report) TP = {} TN = {} FP = {} FN = {} for tag in tag2idx.keys(): TP[tag] = 0 TN[tag] = 0 FP[tag] = 0 FN[tag] = 0 def accumulate_score_by_tag(gt, pred): """
X_test = [sent2features(s) for s in sentences_test] y_test = [sent2labels(s) for s in sentences_test] # Load from file pkl_filename = sys.argv[1] + ".pkl" with open(pkl_filename, 'rb') as file: crf = pickle.load(file) #Predicting on the test set. y_pred = crf.predict(X_test) f1_score = flat_f1_score(y_test, y_pred, average = 'weighted') print(f1_score) print(multilabel_confusion_matrix(sum(y_test, []), sum(y_pred, []), labels=labels)) report = flat_classification_report(y_test, y_pred) print(report) i = np.random.randint(0,len(sentences_test)-1) # choose a random number between 0 and len(sentences_test)b # print(p) print("{:15}||{:5}||{}".format("Word", "True", "Pred")) print(30 * "=") for ((w, r), original, pred) in zip(sentences_test[i], y_test[i], y_pred[i]): if w != 0: print("{:15}: {:5} {}".format(w,original, pred))
model = TimeDistributed(Dense(50, activation="relu"))(model) out = Dense(6, activation='softmax')(model) #crf = CRF(n_tags+1) #out = crf(model) model = Model(input, out) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) model.summary() history = model.fit(X_train, numpy.array(y_train), batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.2, verbose=2) #history = model.fit(X_train, numpy.array(y_train), batch_size = BATCH_SIZE, epochs = EPOCHS, validation_split = 0.2, verbose = 2) pred_cat = model.predict(X_test) pred = numpy.argmax(pred_cat, axis=-1) y_test_true = numpy.argmax(y_test, -1) from sklearn_crfsuite.metrics import flat_classification_report pred_tag = [[idx2tag[i] for i in row] for row in pred] y_test_true_tag = [[idx2tag[i] for i in row] for row in y_test_true] #from sklearn.metrics import f1_score #report = f1_score(y_test, pred_cat) report = flat_classification_report(y_pred=pred_tag, y_true=y_test_true_tag) print(report)
callbacks=[ # keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, mode='auto'), keras.callbacks.TensorBoard(log_dir='./cnn-logs', histogram_freq=1, batch_size=128) ]) # predict pred_y = model.predict([dev_x, dev_chars_x]) pred_id = [] dev_id = [] for pred_one_y, one_length, y in zip(pred_y, length, dev_y): pred_id.append([np.argmax(x) for x in pred_one_y[-one_length:]]) dev_id.append([yy[0] for yy in y[-one_length:]]) labels, tag_names = process_data.get_labels_tags(chunk_tags) report = flat_classification_report(y_pred=pred_id, y_true=dev_id, labels=labels, target_names=tag_names, digits=4) print(report) model.save('model/crf.h5') with open('model/report-cnn-blstm.pkl', 'wb') as wd: pickle.dump(report, wd) wd.close()
) crf.fit(X_train, y_train) crf_new.fit(X_train_new, y_train) joblib.dump(crf, 'crf-suite-old.pkl', compress=9) joblib.dump(crf_new, 'crf-suite-new.pkl', compress=9) ner_new = joblib.load('crf-suite-new.pkl') ner_old = joblib.load('crf-suite-old.pkl') new_pred = ner_new.predict(X_test_new) old_pred = ner_old.predict(X_test) sorted_labels = definitions.KLASSES.copy() del sorted_labels[4] print "-----------------------------------------" print( flat_classification_report(y_test, new_pred, labels=sorted_labels.values(), digits=3, target_names=sorted_labels.values())) print( flat_classification_report(y_test, old_pred, labels=sorted_labels.values(), digits=3, target_names=sorted_labels.values())) print "-----------------------------------------"
X_train = [sent2features(s) for s in data[996:]] y_train = [sent2labels(s) for s in data[996:]] X_test = [sent2features(s) for s in data[:996]] y_test = [sent2labels(s) for s in data[:996]] crf = sklearn_crfsuite.CRF( algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=20, all_possible_transitions=False, ) if __name__ == '__main__': crf.fit(X_train, y_train) y_pred = crf.predict(X_test) y_p, y_t = [], [] for i in range(len(y_pred)): for j in range(len(y_pred[i])): y_p.append(y_pred[i][j]) y_t.append(y_test[i][j]) print( metrics.flat_classification_report(y_test, y_pred, labels=corpus.labels)) print(classification_report(y_t, y_p))
group_k_fold = GroupKFold(n_splits=5) # use same split for all three entities splits = list( group_k_fold.split(data['feats'], data['Material'], data['filenames'])) # Step 4: Run CRF classifier crf = CRF(c1=0.1, c2=0.1, all_possible_transitions=True) pred = {} for ent in ENTITIES: pred[ent] = cross_val_predict(crf, data['feats'], data[ent], cv=splits) # Report scores directly on I and B tags, # disregard 'O' because it is by far the most frequent class print('\n' + ent + ':\n') print(flat_classification_report(data[ent], pred[ent], digits=3, labels=('B', 'I'))) # Step 5: Convert CRF prediction to IOB tags pred_iob_dir = '_train/iob' pred_to_iob(pred, data['filenames'], true_iob_dir, pred_iob_dir) # Step 6: Convert predicted IOB tags to predicted Brat annotations txt_dir = join(DATA_DIR, 'train') brat_dir = '_train/brat' iob_to_brat(pred_iob_dir, txt_dir, brat_dir) # Step 7: Evaluate calculateMeasures(txt_dir, brat_dir, 'rel')
def evaluation(model, dev_data): model.eval() index2word = {v: k for k, v in model.vocab.items()} index2slot = {v: k for k, v in model.slot_vocab.items()} preds = [] labels = [] hits = 0 len_slot = 0 fp = open("data/rea-labels", "w", encoding='utf-8') fr = open("data/pre-labels", "w", encoding='utf-8') ff1 = open("data/labels-tokens1", "w", encoding='utf-8') ff2 = open("data/labels-tokens2", "w", encoding='utf-8') current = [] with torch.no_grad(): for i, batch in enumerate(data_loader(dev_data, 32, True)): h, c, slot, intent = pad_to_batch(batch, model.vocab, model.slot_vocab) h = [hh.to(device) for hh in h] c = c.to(device) slot = slot.to(device) for s in c: a = [index2word[i] for i in s.tolist()] b = ' '.join(a).replace('<pad>', '').strip() current.append(b) for s in slot: for i in s.tolist(): ff1.write(str(i) + ' ') ff1.write('\n') a = [index2slot[i] for i in s.tolist()] len_slot = len(a) b = ' '.join(a) fp.write(b + '\n') intent = intent.to(device) slot_p, intent_p = model(h, c) l = slot_p.max(1)[1] n = len_slot m = [l[i:i + n] for i in range(0, len(l), n)] for s in m: for i in s.tolist(): ff2.write(str(i) + ' ') ff2.write('\n') a = [index2slot[i] for i in s.tolist()] b = ' '.join(a) fr.write(b + '\n') preds.extend([index2slot[i] for i in slot_p.max(1)[1].tolist()]) labels.extend([index2slot[i] for i in slot.view(-1).tolist()]) hits += torch.eq(intent_p.max(1)[1], intent.view(-1)).sum().item() fp.close() fr.close() ff1.close() ff2.close() print(hits / len(dev_data)) sorted_labels = sorted(list(set(labels) - {'O', '<pad>'}), key=lambda name: (name[1:], name[0])) # this is because sklearn_crfsuite.metrics function flatten inputs preds = [[y] for y in preds] labels = [[y] for y in labels] print( metrics.flat_classification_report(labels, preds, labels=sorted_labels, digits=3))
crf.fit(X_train, y_train) labels = list(crf.classes_) #labels.remove('O') print(labels) y_pred = crf.predict(X_test) metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=labels) sorted_labels = sorted( labels, key=lambda name: (name[1:], name[0]) ) print(metrics.flat_classification_report( y_test, y_pred, labels=sorted_labels, digits=3 )) import sys sys.exit() print(eli5.format_as_text((eli5.explain_weights(crf, top=30)))) ''''' eli5.show_weights(crf, top=5, show=['transition_features']) eli5.show_weights(crf, top=10, targets=['O', 'B-ORG', 'I-ORG']) eli5.show_weights(crf, top=10, feature_re='^word\.is', horizontal_layout=False, show=['targets']) expl = eli5.explain_weights(crf, top=5, targets=['O', 'B-LOC', 'I-LOC']) print(eli5.format_as_text(expl)) '''''
X = v.fit_transform(all_features) print X.shape crf = sklearn_crfsuite.CRF( algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True ) crf.fit(X_train, y_train) print crf print ">>>>>>>>" labels = list(crf.classes_) y_pred = crf.predict(X_test) print y_pred print "<<<<<<<<<<<" x=metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=labels) print x print "<<<<<<<<" sorted_labels = sorted( labels, key=lambda name: (name[1:], name[0]) ) print(metrics.flat_classification_report( y_test, y_pred, labels=sorted_labels, digits=3 ))