def eval(self, sentence_result, y_data, progress=False):

        slot_result, domain_result = list(zip(*y_data))

        y_pred, y_pred_target = self.predict(sentence_result,
                                             progress=progress)
        y_test = slot_result
        y_target = np.array([[x] for x in domain_result])
        y_pred_target = np.array([[x] for x in y_pred_target])

        # print(y_target.shape)
        # print(y_pred_target.shape)

        return OrderedDict((
            ('accuracy', metrics.flat_accuracy_score(y_test, y_pred)),
            ('precision',
             metrics.flat_precision_score(y_test, y_pred, average='weighted')),
            ('recall',
             metrics.flat_recall_score(y_test, y_pred, average='weighted')),
            ('f1', metrics.flat_f1_score(y_test, y_pred, average='weighted')),
            ('softmax_accuracy',
             metrics.flat_accuracy_score(y_target, y_pred_target)),
            ('softmax_precision',
             metrics.flat_precision_score(y_target,
                                          y_pred_target,
                                          average='weighted')),
            ('softmax_recall',
             metrics.flat_recall_score(y_target,
                                       y_pred_target,
                                       average='weighted')),
            ('softmax_f1',
             metrics.flat_f1_score(y_target, y_pred_target,
                                   average='weighted')),
        ))
def test_training(storage, capsys):

    annotations = list(
        a for a in storage.iter_annotations(simplify_form_types=True, simplify_field_types=True) if a.fields_annotated
    )[:300]

    crf = train(
        annotations=annotations,
        use_precise_form_types=False,
        optimize_hyperparameters_iters=10,
        full_form_type_names=False,
        full_field_type_names=False,
    )

    out, err = capsys.readouterr()

    assert "Training on 300 forms" in out
    assert "realistic form types" in out
    assert "Best hyperparameters" in out

    assert 0.0 < crf.c1 < 1.5
    assert 0.0 < crf.c2 < 0.9
    assert crf.c1, crf.c2 != _REALISTIC_C1_C2
    assert crf.c1, crf.c2 != _PRECISE_C1_C2

    form_types = np.asarray([a.type for a in annotations])
    X, y = get_Xy(annotations, form_types, full_type_names=False)
    y_pred = crf.predict(X)
    score = flat_accuracy_score(y, y_pred)
    assert 0.9 < score < 1.0  # overfitting FTW!

    field_schema = storage.get_field_schema()
    short_names = set(field_schema.types_inv.keys())
    assert set(crf.classes_).issubset(short_names)
Exemple #3
0
    def predict(self, data, y=None, tag=None):
        if (y != None) and (tag != None):
            # Tagging each word in data to its corresponding tag
            t = tagger(X=data)
            tagged_data_ = t.fit(X=t.tag(), y=y, tag=tag)

            # Generates features required for conditional random field
            f = Features(X=tagged_data_, num_words=self.num_features)
            x_test, y_test = f.get

            # Gets trained model from finalized_model.sav
            loaded_model = pickle.load(open('finalized_model.sav', 'rb'))

            # prediction on test data
            result = loaded_model.predict(x_test)

            #printing classification report and Accuracy
            print('\n\n Classification Report: \n',
                  flat_classification_report(y_test, result))
            print('Accuracy:', flat_accuracy_score(y_test, result))

        elif (y == None) and (tag == None):
            # data is tagged with list of tuples (token, pos tag, leammatized word, other tag)
            t = tagger(X=data)
            tagged_data_ = t.tag()

            # Generates features required for conditional random field
            f = Features(X=tagged_data_, num_words=self.num_features)
            x_test, _ = f.get

            # Gets trained model from finalized_model.sav
            loaded_model = pickle.load(open('finalized_model.sav', 'rb'))

            # prediction on test data
            result = loaded_model.predict(x_test)

#         # tokenizing test data
#         final=pd.DataFrame()
#         final['description'] = [re.findall('[A-Za-z0-9]+',i) for i in data]
#         final['result']=result
#         def func(df,tag):
#             mainlist=[]
#             for i in range(len(df)):
#                 sublist=[]
#                 desc=df['result'].iloc[i]
#                 for j in range(len(desc)):
#                     if(tag==desc[j]):
#                         sublist.append(df['description'].iloc[i][j])
#                 if(len(sublist)!=0):
#                     mainlist.append(' '.join(sublist))
#                 else:
#                     mainlist.append("not assigned")
#             return mainlist
#         products=func(final,'P')
#         issues=func(final,'I')
#         finalresult=pd.DataFrame()
#         finalresult['Products']=products
#         finalresult['Issues']=issues

        return result
def running_metrics(p):
    intent_predictions, slot_predictions = p.predictions
    intent_labels, slot_labels = p.label_ids

    slot_predictions = np.argmax(slot_predictions, axis=2)
    intent_predictions = np.argmax(intent_predictions, axis=1)

    slot_predictions_clean = [[
        p for (p, l) in zip(prediction, label) if l != -100
    ] for prediction, label in zip(slot_predictions, slot_labels)]
    slot_labels_clean = [[
        l for (p, l) in zip(prediction, label) if l != -100
    ] for prediction, label in zip(slot_predictions, slot_labels)]
    intent_f1 = f1_score(intent_labels, intent_predictions, average="macro")
    intent_accuracy = accuracy_score(intent_labels, intent_predictions)
    flat_acc = seq_metrics.flat_accuracy_score(slot_labels_clean,
                                               slot_predictions_clean)
    flat_f1 = seq_metrics.flat_f1_score(slot_labels_clean,
                                        slot_predictions_clean,
                                        average="macro")
    slt_f1_weighted = seq_metrics.flat_f1_score(slot_labels_clean,
                                                slot_predictions_clean,
                                                average="weighted")
    return {
        "flat slot accuracy": flat_acc,
        "flat slot f1": flat_f1,
        "weighted slot f1": slt_f1_weighted,
        "intent f1": intent_f1,
        "intent accuracy": intent_accuracy,
    }
Exemple #5
0
def train(train_file, test_file, min_freq, model_file):
    '''Train a CRF tagger based'''
    # Read in initial training data
    conll_data_train = read_conll_data(train_file)
    train_sents = [[line[0] for line in doc] for doc in conll_data_train]
    train_labels = [[line[2] for line in doc] for doc in conll_data_train]

    # Featurize and create instance from list of sentences
    feat_sent_train = build_dataset(train_sents)
    print("Training on {0} inst".format(len(feat_sent_train)))

    # Train and test loop for parameter settings
    # Create and train CRF model
    # For different parameter options, see:
    # https://sklearn-crfsuite.readthedocs.io/en/latest/_modules/sklearn_crfsuite/estimator.html
    model = CRF(min_freq=min_freq)
    model.fit(feat_sent_train, train_labels)

    # Test the model on held out test set if wanted
    if args.test_file:
        conll_data_test = read_conll_data(test_file)
        test_sents = [[line[0] for line in doc] for doc in conll_data_test]
        test_labels = [[line[2] for line in doc] for doc in conll_data_test]
        feat_sent_test = build_dataset(test_sents)
        # Predicting and printing accuracy
        pred = model.predict(feat_sent_test)
        acc = metrics.flat_accuracy_score(test_labels, pred)
        print("Accuracy: {0}%".format(float(round(acc, 3)) * 100))
    # Save model to disk if wanted
    if args.model:
        print("Saving model to {0}".format(model_file))
        joblib.dump(model, model_file)
Exemple #6
0
    def train1(self, data, y, tag):
        #tagged_data = a.fit(a.tag(),y,tag)
        # Features as conditional random field accepts
        feaobj = Features(data, self.num_features)
        x_train, y_train = feaobj.get
        print("labelled data")
        # Using conditional random field as features
        crf = CRF(algorithm='lbfgs',
                  c1=0.1,
                  c2=0.1,
                  max_iterations=100,
                  all_possible_transitions=False)
        print(crf)
        crf.fit(x_train, y_train)

        # Saving the model which is trained
        filename = 'finalized_model.sav'
        pickle.dump(crf, open(filename, 'wb'))

        # Prediction on train
        pred = crf.predict(x_train)

        # printing classification report and Accuracy
        print('\n \n Prediction On Trained Data:\n \n',
              flat_classification_report(y_train, pred))
        print('Accuracy:', flat_accuracy_score(y_train, pred))
Exemple #7
0
	def cross_validate(self):
		kfold = KFold(n_splits=3)
		for train_ids, test_ids in kfold.split(self.sentences):
			X_train = [self.sent2features(self.sentences[i][0]) for i in train_ids]
			y_train = [self.sent2labels(self.sentences[i][0], self.sentences[i][1]) for i in train_ids]

			crf = sklearn_crfsuite.CRF(
				algorithm='lbfgs',
				c1=0.1,
				c2=0.2,
				max_iterations=100,
				all_possible_transitions=True
			)
			crf.fit(X_train, y_train)

			labels = list(crf.classes_)

			X_test = [self.sent2features(self.sentences[i][0]) for i in test_ids]
			y_test = [self.sent2labels(self.sentences[i][0], self.sentences[i][1]) for i in test_ids]
			y_pred = crf.predict(X_test)

			for idx, id in enumerate(test_ids):
				print(self.sentences[id][0])
				print(self.sentences[id][1])
				print(y_pred[idx])
				print(y_test[idx])

			# print(metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=labels))
			print(metrics.flat_accuracy_score(y_test, y_pred))
Exemple #8
0
def test_training(storage, capsys):
    annotations = (a for a in storage.iter_annotations(
        simplify_form_types=True,
        simplify_field_types=True,
    ) if a.fields_annotated)
    annotations = list(itertools.islice(annotations, 0, 300))

    crf = train(annotations=annotations,
                use_precise_form_types=False,
                optimize_hyperparameters_iters=2,
                optimize_hyperparameters_folds=2,
                optimize_hyperparameters_jobs=-1,
                full_form_type_names=False,
                full_field_type_names=False)

    out, err = capsys.readouterr()

    assert 'Training on 300 forms' in out
    assert 'realistic form types' in out
    assert 'Best hyperparameters' in out

    assert 0.0 < crf.c1 < 2.5
    assert 0.0 < crf.c2 < 0.9
    assert crf.c1, crf.c2 != _REALISTIC_C1_C2
    assert crf.c1, crf.c2 != _PRECISE_C1_C2

    form_types = np.asarray([a.type for a in annotations])
    X, y = get_Xy(annotations, form_types, full_type_names=False)
    y_pred = crf.predict(X)
    score = flat_accuracy_score(y, y_pred)
    assert 0.9 < score < 1.0  # overfitting FTW!

    field_schema = storage.get_field_schema()
    short_names = set(field_schema.types_inv.keys())
    assert set(crf.classes_).issubset(short_names)
def crf_tag():
    brown_tagged_sents = brown.tagged_sents(categories='news')
    #print(brown_tagged_sents[0])
    train_len = int(len(brown_tagged_sents) * 0.9)
    training_sentences = brown_tagged_sents[:train_len]
    test_sentences = brown_tagged_sents[train_len:]

    X_train, y_train = transform_to_dataset(training_sentences)
    X_test, y_test = transform_to_dataset(test_sentences)

    #print(len(X_train))
    #print(len(X_test))
    print(X_train[0])
    print(y_train[0])

    model = CRF()
    model.fit(X_train, y_train)

    raw_sent = ['I', 'am', 'a', 'student']
    sent_feat = [
        feature_extract(raw_sent, index) for index in range(len(raw_sent))
    ]
    print(list(zip(raw_sent, model.predict([sent_feat])[0])))
    y_pred = model.predict(X_test)
    print(metrics.flat_accuracy_score(y_test, y_pred))
Exemple #10
0
    def train(self, model_name, tagged_sentences):
        # Split the dataset for training and testing
        cutoff = int(.75 * len(tagged_sentences))
        training_sentences = tagged_sentences[:cutoff]
        test_sentences = tagged_sentences[cutoff:]

        X_train, y_train = transform_to_dataset(training_sentences)
        X_test, y_test = transform_to_dataset(test_sentences)
        print(len(X_train))
        print(len(X_test))


        print("Training Started........")
        print("it will take time according to your dataset size..")
        model = CRF()
        model.fit(X_train, y_train)
        print("Training Finished!")
        
        print("Evaluating with Test Data...")
        y_pred = model.predict(X_test)
        print("Accuracy is: ")
        print(metrics.flat_accuracy_score(y_test, y_pred))
        
        pickle.dump(model, open(model_name, 'wb'))
        print("Model Saved!")
Exemple #11
0
    def evaluate(self, test=None):
        '''
        '''
        t0 = t = time()
        self.logger.info('started evaluation')
        #
        if test:
            self.test = Data(test, sent_cls=self.sent_cls)
            t0, t = t, time()
            self.logger.info('{:.2f}'.format(t - t0) +
                             's extracted test features')
            self.logger.info('processed ' + str(self.test.num_sents) +
                             ' sentences')
        #
        if not self.test:
            self.logger.error('cannot evaluate without the test data')
            return
        #
        y_true = self.test.labels
        y_pred = self.tagger.predict(self.test.features)
        t0, t = t, time()
        self.logger.info('{:.2f}'.format(t - t0) + 's generated predictions')

        #
        accuracy = 100 * flat_accuracy_score(y_true, y_pred)
        self.logger.info('Accuracy  : ' + '{:.2f}'.format(accuracy))
        t0, t = t, time()
        self.logger.info('{:.2f}'.format(t - t0) + 's evaluated test data')

        return accuracy
Exemple #12
0
def evaluate_rnn(y, preds):
    """Because the RNN sequences get clipped as necessary based
    on the `max_length` parameter, they have to be realigned to
    get a classification report. This method does that, building
    in the assumption that any clipped tokens are assigned an
    incorrect label.

    Parameters
    ----------
    y : list of list of labels
    preds : list of list of labels

    Both of these lists need to have the same length, but the
    sequences they contain can vary in length.
    """
    labels = sorted({c for ex in y for c in ex})
    new_preds = []
    for gold, pred in zip(y, preds):
        delta = len(gold) - len(pred)
        if delta > 0:
            # Make a *wrong* guess for these clipped tokens:
            pred += [random.choice(list(set(labels)-{label}))
                     for label in gold[-delta: ]]
        new_preds.append(pred)
    labels = sorted({cls for ex in y for cls in ex} - {'OTHER'})
    data = {}
    data['classification_report'] = flat_classification_report(y, new_preds)
    data['f1_macro'] = flat_f1_score(y, new_preds, average='macro')
    data['f1_micro'] = flat_f1_score(y, new_preds, average='micro')
    data['f1'] = flat_f1_score(y, new_preds, average=None)
    data['precision_score'] = flat_precision_score(y, new_preds, average=None)
    data['recall_score'] = flat_recall_score(y, new_preds, average=None)
    data['accuracy'] = flat_accuracy_score(y, new_preds)
    data['sequence_accuracy_score'] = sequence_accuracy_score(y, new_preds)
    return data
Exemple #13
0
    def score(self, X, y):
        """
        Return accuracy score computed for sequence items.

        For other metrics check :mod:`sklearn_crfsuite.metrics`.
        """
        y_pred = self.predict(X)
        return flat_accuracy_score(y, y_pred)
    def score(self, X, y):
        """
        Return accuracy score computed for sequence items.

        For other metrics check :mod:`sklearn_crfsuite.metrics`.
        """
        y_pred = self.predict(X)
        return flat_accuracy_score(y, y_pred)
Exemple #15
0
    def calculate_overall_accuracy_and_f1_score_per_pos(
            self, print_results=False):
        hmm_tagger = HMMTagger()

        for i in self.train_sizes:
            hmm_tagger.train_tagger(self.train_data[:i])
            train_tags, train_pred = hmm_tagger.predict(self.train_data[:i])
            dev_tags, dev_pred = hmm_tagger.predict(self.dev_data)

            labels = []
            for sentence_tags in train_tags:
                for tag in sentence_tags:
                    labels.append(tag)
            labels = list(set(labels))

            accuracy_score_train = metrics.flat_accuracy_score(
                train_tags, train_pred)
            self.train_overall_accuracy.append(accuracy_score_train)
            accuracy_score_dev = metrics.flat_accuracy_score(
                dev_tags, dev_pred)
            self.dev_overall_accuracy.append(accuracy_score_dev)

            f1_score_train = metrics.flat_classification_report(train_tags,
                                                                train_pred,
                                                                labels=labels)
            self.calculate_f1_stats(f1_score_train)
            # self.classification_report_csv(f1_score_train)
            f1_score_dev = metrics.flat_classification_report(dev_tags,
                                                              dev_pred,
                                                              labels=labels)
            self.calculate_f1_stats(f1_score_dev, False)
            # self.classification_report_csv(f1_score_dev, False)

            if print_results:
                print('The overall accuracy on Train data for train size = ' +
                      str(i) + ' is = ' + str(accuracy_score_train))
                print('The overall accuracy on DEV data for train size = ' +
                      str(i) + ' is = ' + str(accuracy_score_dev))
                print('Report')
                print('The overall accuracy on Train data for train size = ' +
                      str(i) + ' is = ' + f1_score_train)
                print('The overall accuracy on DEV data for train size = ' +
                      str(i) + ' is = ' + f1_score_dev)
                print(
                    '--------------------------------------------------------------------------------------'
                )
Exemple #16
0
    def evaluate(self, x, y):
        y_pred = self.model.predict(x)
        print(metrics.flat_accuracy_score(y, y_pred))

        count = 0
        for i in range(len(y_pred)):
            if np.array_equal(y_pred[i], y[i]):
                count += 1

        print("Acc:", count / len(y))
Exemple #17
0
    def calculate_overall_accuracy_and_f1_score_per_pos(
            self, crf_hyperparameters, print_results=False):
        crf_pos_model = CrfPosTagger()

        for i in self.train_sizes:
            my_model = crf_pos_model.trainCRF(self.data_features[:i],
                                              self.data_target[:i],
                                              crf_hyperparameters)
            train_pred = my_model.predict(self.data_features[:i])
            dev_pred = my_model.predict(self.dev_features)
            labels = list(my_model.classes_)

            accuracy_score_train = metrics.flat_accuracy_score(
                self.data_target[:i], train_pred)
            self.train_overall_accuracy.append(accuracy_score_train)
            accuracy_score_dev = metrics.flat_accuracy_score(
                self.dev_target, dev_pred)
            self.dev_overall_accuracy.append(accuracy_score_dev)

            f1_score_train = metrics.flat_classification_report(
                self.data_target[:i], train_pred, labels=labels)
            self.calculate_f1_stats(f1_score_train)
            # self.classification_report_csv(f1_score_train)
            f1_score_dev = metrics.flat_classification_report(self.dev_target,
                                                              dev_pred,
                                                              labels=labels)
            self.calculate_f1_stats(f1_score_dev, False)
            # self.classification_report_csv(f1_score_dev, False)

            if print_results:
                print('The overall accuracy on Train data for train size = ' +
                      str(i) + ' is = ' + str(accuracy_score_train))
                print('The overall accuracy on DEV data for train size = ' +
                      str(i) + ' is = ' + str(accuracy_score_dev))
                print('Report')
                print('The overall accuracy on Train data for train size = ' +
                      str(i) + ' is = ' + f1_score_train)
                print('The overall accuracy on DEV data for train size = ' +
                      str(i) + ' is = ' + f1_score_dev)
                print(
                    '--------------------------------------------------------------------------------------'
                )
def testing(crf,X_test,time_seq=[],y_test=[],save=0):
	if y_test:
		print("Results:")
		labels = list(crf.classes_)
		y_pred = crf.predict(X_test)
		sorted_labels = [str(x) for x in sorted(labels,key=lambda name: (name[1:], name[0]))]
		print(metrics.flat_classification_report(y_test, y_pred, digits=3, labels=sorted_labels))
		# plot_results(y_pred,X_test,time_seq,save)
		return metrics.flat_accuracy_score(y_test, y_pred) # *** , labels=sorted_labels)
	else:
		y_pred = crf.predict(X_test)
		plot_results(y_pred,X_test,time_seq,save)
		return y_pred
Exemple #19
0
def evaluate(dataset_name, data_iter, model, full_report=False):
  
  model.eval()
  total_corrects, avg_loss = 0, 0
  for batch in data_iter:
    text, target = batch.Phrase, batch.Sentiment


    output = model(text)
    
    loss = F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
    pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
    

    correct = pred.eq(target.view_as(pred)).sum().item()
    
    avg_loss += loss
    
    total_corrects += correct

  size = len(data_iter.dataset)
  avg_loss /= size
  accuracy = 100.0 * total_corrects/size
  print('  Evaluation on {} - loss: {:.6f}  acc: {:.4f}%({}/{})'.format(dataset_name,
                                                                     avg_loss, 
                                                                     accuracy, 
                                                                     total_corrects, 
                                                                     size))

  targetList = []
  for tar in target:
    list1 = []
    list1.append(tar)
    targetList.append(list1)
  pred = pred.tolist()
  predList = []
  for pre in pred:
    list1 = []
    list1.append(pre)
    predList.append(list1)
  

  if full_report:
    print(sklearn_crfsuite.metrics.flat_classification_report(targetList, predList, labels=[0,1,2,3,4]))
    print("accuracy_score", flat_accuracy_score(targetList, predList))

    print("precision_score", flat_precision_score(targetList, predList, average='weighted'))
    print("recall_score", flat_recall_score(targetList, predList, average='weighted'))
    print("f1_score", flat_f1_score(targetList, predList, average='weighted'))
  return accuracy
Exemple #20
0
 def eval(self, sentence_result, slot_result):
     """评估结果"""
     y_pred = self.predict(sentence_result)
     y_test = slot_result
     return {
         'precision':
         metrics.flat_precision_score(y_test, y_pred, average='weighted'),
         'recall':
         metrics.flat_recall_score(y_test, y_pred, average='weighted'),
         'f1':
         metrics.flat_f1_score(y_test, y_pred, average='weighted'),
         'accuracy':
         metrics.flat_accuracy_score(y_test, y_pred),
     }
Exemple #21
0
    def tag(self, test=None, save=None):
        '''
        if save is not given write to stdout 
        '''
        '''
        '''
        t0 = t = time()
        self.logger.info('started tagging')
        #
        if test:
            self.test = Data(test, sent_cls=self.sent_cls)
            t0, t = t, time()
            self.logger.info('{:.2f}'.format(t - t0) +
                             's extracted test features')
            self.logger.info('processed ' + str(self.test.num_sents) +
                             ' sentences')
        #
        if not self.test:
            self.logger.error('cannot tag without the test data')
            return
        #
        y_pred = self.tagger.predict(self.test.features)
        t0, t = t, time()
        self.logger.info('{:.2f}'.format(t - t0) + 's generated predictions')

        # print accuracy, if given data contains labels
        accuracy = 0.0
        y_true = self.test.labels
        if [tag for s_true in y_true for tag in s_true if tag != '_']:
            accuracy = 100 * flat_accuracy_score(y_true, y_pred)
            self.logger.info('Accuracy  : ' + '{:.2f}'.format(accuracy))
            t0, t = t, time()
            self.logger.info('{:.2f}'.format(t - t0) + 's tagged test data')

        # set ccg categories of the sentences
        self.test.update_tags(y_pred)
        #
        if not save:
            for sent in self.test.sentences:
                print(sent)
        else:
            with open(save, 'w', encoding='utf-8') as f:
                for sent in self.test.sentences:
                    f.write(str(sent) + '\n')
            t0, t = t, time()
            self.logger.info('{:.2f}'.format(t - t0) + 's saved as ' + save)

        return accuracy
Exemple #22
0
def test_model(
    model: sklearn_crfsuite.CRF,
    test_path: typing.Union[str, Path],
    out_file: typing.Optional[typing.TextIO] = None,
):
    """Print an accuracy report for a model to a file"""
    try:
        import conllu
    except ImportError as e:
        _LOGGER.fatal("conllu package is required for testing")
        _LOGGER.fatal("pip install 'conllu>=4.4'")
        raise e

    _LOGGER.debug("Loading test file (%s)", test_path)
    with open(test_path, "r") as test_file:
        test_sents = conllu.parse(test_file.read())

    _LOGGER.debug("Getting features for %s test sentence(s)", len(test_sents))
    x_test = [sent2features(s) for s in test_sents]
    y_test = [sent2labels(s) for s in test_sents]

    labels = list(model.classes_)

    y_pred = model.predict(x_test)
    print(
        "F1 score on the test set = {}".format(
            metrics.flat_f1_score(y_test,
                                  y_pred,
                                  average="weighted",
                                  labels=labels)),
        file=out_file,
    )
    print(
        "Accuracy on the test set = {}".format(
            metrics.flat_accuracy_score(y_test, y_pred)),
        file=out_file,
    )

    sorted_labels = sorted(labels, key=lambda name: (name[1:], name[0]))
    print(
        "Test set classification report: {}".format(
            metrics.flat_classification_report(y_test,
                                               y_pred,
                                               labels=sorted_labels,
                                               digits=3)),
        file=out_file,
    )
Exemple #23
0
def train_crf_pos(corpus, corpus_name):

    # Required corpus structure:
    # [[(w1,t1), (w2,t2),...(wn,tn)], [(w1,t1)(w2,t2),...(wm,tm)],...]

    #feat_all = {} # common features (baseline set)
    #feat_en = {} # extra features for English
    #features = {**feat_all, **feat_en}
    train_frac = 0.9  # fraction of data for the training set
    split_idx = int(train_frac * len(corpus))

    # Extract the feautures and separate labels from features
    X = [get_crf_features([pair[0] for pair in sent]) for sent in corpus]
    y = [[pair[1] for pair in sent] for sent in corpus]

    # Create the training and the test sets
    X_train = X[:split_idx]
    y_train = y[:split_idx]
    X_test = X[split_idx:]
    y_test = y[split_idx:]

    # Create the CRF model
    model = CRF(
        algorithm='lbfgs',  # gradient descent using the L-BFGS method
        c1=0.1,  # coeff. for L1 regularization
        c2=0.1,  # coeff. for L2 regularization
        max_iterations=100,
    )

    # Train the model
    model.fit(X_train, y_train)

    # Save the model
    with open(os.path.join('data', 'models', corpus_name + '_crf.pkl'),
              'wb') as f:
        pickle.dump(model, f, 4)

    # Evaluate the model
    y_pred = model.predict(X_test)
    print("Test accuracy: %.4f" % metrics.flat_accuracy_score(y_test, y_pred))

    return model
Exemple #24
0
def evaluate_rnn(y, preds):
    """ Evaluate the RNN performance using various metrics.

  Parameters
  ----------
  y: list of list of labels
  preds: list of list of labels

  Both of these lists need to have the same length, but the
  sequences they contain can vary in length.

  Returns
  -------
  data: dict
  """

    labels = sorted({c for ex in y for c in ex})
    new_preds = []
    for gold, pred in zip(y, preds):
        delta = len(gold) - len(pred)
        if delta > 0:
            # Make a *wrong* guess for these clipped tokens:
            pred += [
                random.choice(list(set(labels) - {label}))
                for label in gold[-delta:]
            ]
        new_preds.append(pred)
    labels = sorted({cls for ex in y for cls in ex} - {"OTHER"})
    data = {}
    data["classification_report"] = flat_classification_report(y,
                                                               new_preds,
                                                               digits=3)
    data["f1_macro"] = flat_f1_score(y, new_preds, average="macro")
    data["f1_micro"] = flat_f1_score(y, new_preds, average="micro")
    data["f1"] = flat_f1_score(y, new_preds, average=None)
    data["precision_score"] = flat_precision_score(y, new_preds, average=None)
    data["recall_score"] = flat_recall_score(y, new_preds, average=None)
    data["accuracy"] = flat_accuracy_score(y, new_preds)
    data["sequence_accuracy_score"] = sequence_accuracy_score(y, new_preds)

    return data
def print_classification_report(annotations, n_splits=10, model=None):
    """ Evaluate model, print classification report """
    if model is None:
        # FIXME: we're overfitting on hyperparameters - they should be chosen
        # using inner cross-validation, not set to fixed values beforehand.
        model = get_model(use_precise_form_types=True)

    annotations = [a for a in annotations if a.fields_annotated]
    form_types = formtype_model.get_realistic_form_labels(
        annotations=annotations,
        n_splits=n_splits,
        full_type_names=False
    )

    X, y = get_Xy(
        annotations=annotations,
        form_types=form_types,
        full_type_names=True,
    )
    group_kfold = GroupKFold(n_splits=n_splits)
    groups = [get_domain(ann.url) for ann in annotations]
    y_pred = cross_val_predict(model, X, y, cv=group_kfold, groups=groups,
                               n_jobs=-1)

    all_labels = list(annotations[0].field_schema.types.keys())
    labels = sorted(set(flatten(y_pred)), key=lambda k: all_labels.index(k))
    print((flat_classification_report(y, y_pred, digits=2,
                                     labels=labels, target_names=labels)))

    print((
        "{:0.1f}% fields are classified correctly.".format(
            flat_accuracy_score(y, y_pred) * 100
        )
    ))
    print((
        "All fields are classified correctly in {:0.1f}% forms.".format(
            sequence_accuracy_score(y, y_pred) * 100
        )
    ))
def print_classification_report(annotations, n_folds=10, model=None):
    """ Evaluate model, print classification report """
    if model is None:
        # FIXME: we're overfitting on hyperparameters - they should be chosen
        # using inner cross-validation, not set to fixed values beforehand.
        model = get_model(use_precise_form_types=True)

    annotations = [a for a in annotations if a.fields_annotated]
    form_types = formtype_model.get_realistic_form_labels(
        annotations=annotations, n_folds=n_folds, full_type_names=False
    )

    X, y = get_Xy(annotations=annotations, form_types=form_types, full_type_names=True)
    cv = get_annotation_folds(annotations, n_folds=n_folds)
    y_pred = cross_val_predict(model, X, y, cv=cv, n_jobs=-1)

    all_labels = list(annotations[0].field_schema.types.keys())
    labels = sorted(set(flatten(y_pred)), key=lambda k: all_labels.index(k))
    print(flat_classification_report(y, y_pred, digits=2, labels=labels, target_names=labels))

    print("{:0.1f}% fields are classified correctly.".format(flat_accuracy_score(y, y_pred) * 100))
    print("All fields are classified correctly in {:0.1f}% forms.".format(sequence_accuracy_score(y, y_pred) * 100))
Exemple #27
0
def get_crf_metrics(y_pred, y_true, labels):
    token_acc_score = round(metrics.flat_accuracy_score(y_true, y_pred), 2)
    token_recall_score = round(
        metrics.flat_recall_score(y_true,
                                  y_pred,
                                  average='weighted',
                                  labels=labels), 2)
    token_f1_score = round(
        metrics.flat_f1_score(y_true,
                              y_pred,
                              average='weighted',
                              labels=labels), 2)
    token_precision_score = round(
        metrics.flat_precision_score(y_true,
                                     y_pred,
                                     average='weighted',
                                     labels=labels), 2)
    report = metrics.flat_classification_report(y_true,
                                                y_pred,
                                                labels=labels,
                                                output_dict=True)
    report_df = pd.DataFrame(report).T
    report_df = report_df.round(2)
    cm_dict = metrics.performance_measure(y_true, y_pred)
    cm = np.array([[cm_dict['TN'], cm_dict['FP']],
                   [cm_dict['FN'], cm_dict['TP']]])
    support = cm_dict['FN'] + cm_dict['TP']
    res_d = {
        'accuracy': token_acc_score,
        'recall': token_recall_score,
        'f1_score': token_f1_score,
        'precision': token_precision_score,
        'support': support,
        'cm': cm,
        'report': report_df
    }
    return res_d
Exemple #28
0
def crfs(tagged_sentences):
    def features(sentence, index):
        """ sentence: [w1, w2, ...], index: the index of the word """
        return {
            'word': sentence[index],
            'is_first': index == 0,
            'is_last': index == len(sentence) - 1,
            'is_capitalized': sentence[index][0].upper() == sentence[index][0],
            'is_all_caps': sentence[index].upper() == sentence[index],
            'is_all_lower': sentence[index].lower() == sentence[index],
            'prefix-1': sentence[index][0],
            'prefix-2': sentence[index][:2],
            'prefix-3': sentence[index][:3],
            'suffix-1': sentence[index][-1],
            'suffix-2': sentence[index][-2:],
            'suffix-3': sentence[index][-3:],
            'prev_word': '' if index == 0 else sentence[index - 1],
            'next_word':
            '' if index == len(sentence) - 1 else sentence[index + 1],
            'has_hyphen': '-' in sentence[index],
            'is_numeric': sentence[index].isdigit(),
            'capitals_inside':
            sentence[index][1:].lower() != sentence[index][1:]
        }

    # Split the dataset for training and testing
    cutoff = int(.75 * len(tagged_sentences))
    training_sentences = tagged_sentences[:cutoff]
    test_sentences = tagged_sentences[cutoff:]

    def transform_to_dataset(tagged_sentences):
        X, y = [], []

        for tagged in tagged_sentences:
            X.append([
                features(untag(tagged), index) for index in range(len(tagged))
            ])
            y.append([tag for _, tag in tagged])

        return X, y

    X_train, y_train = transform_to_dataset(training_sentences)
    X_test, y_test = transform_to_dataset(test_sentences)

    print(len(X_train))
    print(len(X_test))
    print(X_train[0])
    print(y_train[0])

    model = CRF()
    model.fit(X_train, y_train)

    sentence = ['I', 'am', 'Bob', '!']

    def pos_tag(sentence):
        sentence_features = [
            features(sentence, index) for index in range(len(sentence))
        ]
        return list(zip(sentence, model.predict([sentence_features])[0]))

    print(pos_tag(
        sentence))  # [('I', 'PRP'), ('am', 'VBP'), ('Bob', 'NNP'), ('!', '.')]

    y_pred = model.predict(X_test)
    print("CRFs Accuracy", metrics.flat_accuracy_score(y_test, y_pred))

    return 0
def test_flat_accuracy():
    score = metrics.flat_accuracy_score(y1, y2)
    assert score == 3 / 5
    'feature.possible_transitions': True
})

# Provide a file name as a parameter to the train function, such that
# the model will be saved to the file when training is finished
trainer.train('crf.model')
tagger = pycrfsuite.Tagger()
tagger.open('crf.model')
y_pred = [tagger.tag(xseq) for xseq in X_test]
y_pred1 = [tagger.tag(xseq) for xseq in X_test1]
#print(flat_accuracy_score(Y_test,y_pred))
'''
# Let's take a look at a random sample in the testing set
for i in range(len(X_test)):
    for x, y in zip(y_pred[i], [x[1].split("=")[1] for x in X_test[i]]):
        print("%s (%s)" % (y, x))
'''
# Create a mapping of labels to indices
labels = {"NonDrug": 1, "Drug": 0}

# Convert the sequences of tags into a 1-dimensional array
predictions = np.array([labels[tag] for row in y_pred for tag in row])
truths = np.array([labels[tag] for row in Y_test for tag in row])
print(flat_accuracy_score(Y_test, y_pred))

# Print out the classification report
print(
    classification_report(truths,
                          predictions,
                          target_names=["NonDrug", "Drug"]))
Exemple #31
0
    print("=======================")
    print("Load trained model ...")
    model = pickle.load(open("./models/" + MODEL_NAME, "rb"))
    print("Done!!!")

    predict = model.predict(X_test)

    print("=======================")
    print("Testing ....")
    print(len(y_test), len(predict))

    avg_count = 0
    print(predict[0])
    for i in range(len(y_test)):
        acc = evaluate(predict[i], y_test[i])
        # print(acc)
        avg_count += acc

    # print(score)

    print("Avg acc:", avg_count / float(len(y_test)))
    print(model.classes_)
    print("Accuracy\t:", metrics.flat_accuracy_score(y_test, predict))
    print("Precision\t:",
          metrics.flat_precision_score(y_test, predict, average=None))
    print("Recall\t:",
          len(metrics.flat_recall_score(y_test, predict, average=None)))
    print("F1\t:", metrics.flat_f1_score(y_test, predict, average=None))

    print("Done!!!")
Exemple #32
0
def test_flat_accuracy():
    score = metrics.flat_accuracy_score(y1, y2)
    assert score == 3 / 5
Exemple #33
0
                             all_possible_transitions=True)

            if VERBOSE == "full":
                print("[Info][Model=Classes][MAX_ITER=" + str(MAX_ITER) +
                      "] Learning...")

            # print(len(features_train), len(target_train), set(target_train))
            model = model.fit([features_train], [target_train])

            if VERBOSE == "full":
                print("[Info][Model=Classes][MAX_ITER=" + str(MAX_ITER) +
                      "] Testing")

            labels = list(model.classes_)
            y_pred = model.predict([features_test])
            v = crfsMetrics.flat_accuracy_score(y_pred, [target_test])
            if first:
                scrs = []
                first = False
            else:
                scrs = joblib.load(
                    "/home/lsablayr/stageM1/debates/step2_M1/learning/scrs")
            scrs.append([c1, c2, MAX_ITER, v])
            joblib.dump(scrs, "scrs", pickle.HIGHEST_PROTOCOL, compress=True)
            del scrs
            if v > max_scr:
                max_scr = v
                iter_max = MAX_ITER
                c1_max = c1
                c2_max = c2
                joblib.dump(model, "modelCRF.save")
Exemple #34
0
 def eval(self, pred_tags, gold_tags):
     if self.model is None:
         raise ValueError("No trained model")
     print(self.model.classes_)
     print("Acc =", metrics.flat_accuracy_score(pred_tags, gold_tags))
Exemple #35
0
def train(c1, c2, MAX_ITER):
    global targets
    global features
    global MAX_ITER_MAX
    global MAX_ITER_MIN
    global VERBOSE
    global TEST_PERCENT
    global first
    global iter_max
    global c1_max
    global c2_max
    global max_scr

    print("(" + str(c1) + "," + str(c2) + "," + str(MAX_ITER) + ")")
    if VERBOSE == "min":
        print(
            '\033[1A' + "[Info][Model=Crf][MAX_ITER=" + str(MAX_ITER) +
            "] Learning test :",
            round(
                float(MAX_ITER - MAX_ITER_MIN) /
                float(MAX_ITER_MAX - MAX_ITER_MIN) * 100.0, 2), "%")
    if VERBOSE == "full":
        print(
            "[Info][Model=Crf][MAX_ITER=" + str(MAX_ITER) +
            "]================= NB ITER :", MAX_ITER,
            "======================================")
    #on split le dataset
    # features_train, features_test, target_train, target_test = modelSelect.train_test_split(features, targets_trans, test_size=TEST_PERCENT)
    sss = modelSelect.StratifiedShuffleSplit(n_splits=2,
                                             test_size=TEST_PERCENT)
    features_train, features_test, target_train, target_test = [], [], [], []
    for train_i, test_i in sss.split(f, targets):
        for i in train_i:
            features_train.append(features[i])
            target_train.append(targets[i])

        for i in test_i:
            features_test.append(features[i])
            target_test.append(targets[i])

    model = crfs.CRF(algorithm='lbfgs',
                     c1=c1,
                     c2=c2,
                     max_iterations=MAX_ITER,
                     all_possible_transitions=True)

    if VERBOSE == "full":
        print("[Info][Model=Classes][MAX_ITER=" + str(MAX_ITER) +
              "] Learning...")

    # print(len(features_train), len(target_train), set(target_train))
    model = model.fit([features_train], [target_train])

    if VERBOSE == "full":
        print("[Info][Model=Classes][MAX_ITER=" + str(MAX_ITER) + "] Testing")

    labels = list(model.classes_)
    y_pred = model.predict([features_test])
    v = crfsMetrics.flat_accuracy_score(y_pred, [target_test])
    if first:
        scrs = []
        first = False
    else:
        scrs = joblib.load(
            "/home/lsablayr/stageM1/debates/step2_M1/learning/scrs.gz")
    scrs.append([c1, c2, MAX_ITER, v])
    joblib.dump(scrs,
                "/home/lsablayr/stageM1/debates/step2_M1/learning/scrs.gz",
                ('gzip', 3), pickle.HIGHEST_PROTOCOL)
    del scrs
    if v > max_scr:
        max_scr = v
        iter_max = MAX_ITER
        c1_max = c1
        c2_max = c2
        joblib.dump(model, "modelCRF.save")
        print("New best accuracy for crf model with c1 =", c1, "c2 =", c2,
              "MAX_ITER =", MAX_ITER, "score :", v)
    if VERBOSE == "full":
        print(
            "[Info][Model=Classes][MAX_ITER=" + str(MAX_ITER) +
            "] Mean test accuracy:", v)