def test_ensure_safe(self): unsafe_examples = [ ([[]], [[]]), ([[], []], [[], []]), ] # We don't assert anything here but it segfault otherwise for x, y in unsafe_examples: x, y = _ensure_safe(x, y) model = CRF().fit(x, y) model.predict_single([""])
crf = CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True) pred = cross_val_predict(estimator=crf, X=X, y=y, cv=5) report = flat_classification_report(y_pred=pred, y_true=y) crf.fit(X, y) import spacy import en_core_web_sm nlp = en_core_web_sm.load() def pos_tagger(sent): doc = nlp(sent) sent_list = [] for token in doc: sent_list.append((token.text, token.tag_)) return sent_list print(report) crf.score(X, y) x=crf.predict_single(sent2features(pos_tagger("Jim bought 300 shares of Acme Corp. in 2006"))) print(x)
from Sents2Features import string2Features from sklearn_crfsuite import CRF import configparser config = configparser.ConfigParser() config.read('config.ini') names = config['Names'] crf = CRF(algorithm='lbfgs', c1=0.0001, c2=0.0001, max_iterations=100, all_possible_transitions=False, model_filename=names['model_name']) print( crf.predict_single( string2Features( 'Ég kenni stærðfræði í Háskóla Íslands öll virk kvöld')))
class Model: def __init__(self, algo: str = 'lbfgs', min_freq: int = 0, all_states: bool = False, max_iter: int = 100, epsilon: float = 1e-5, delta: float = 1e-5): """ :param algo: optimization algorithm (lbfgs, l2sgd, ap, pa, arow) :param min_freq: threshold of ignoring feature :param all_states: if True, consider combinations of missing features and labels :param max_iter: max iteration size :param epsilon: learning rate :param delta: stop training threshold """ self._algo = algo self._min_freq = min_freq self._all_states = all_states self._max_iter = max_iter self._epsilon = epsilon self._delta = delta self.model = CRF(algorithm=algo, min_freq=min_freq, all_possible_states=all_states, max_iterations=max_iter, epsilon=epsilon, delta=delta) def train(self, features: List[List[Dict[str, Any]]], labels: List[List[str]]): """ train CRF model using dataset features and labels :param features: features of sentences :param labels: labels of sentences :return: """ self.model.fit(features, labels) def predict(self, features: List[Dict[str, Any]]) -> List[str]: """ predict NE labels of a sentence :param features: features of a sentence :return: labels of a sentence """ return self.model.predict_single(features) def predict_all(self, features: List[List[Dict[str, Any]]])\ -> List[List[str]]: """ predict NE labels of sentences :param features: features of sentences :return: labels of sentences """ return self.model.predict(features) def label_types(self) -> List[str]: """ get label types of dataset :return: label types of dataset """ label_types = list(self.model.classes_) label_types.remove('O') label_types = sorted(list(set(label[2:] for label in label_types))) return label_types def hyper_param_tune(self, train_features: List[List[Dict[str, Any]]], train_labels: List[List[str]], dev_features: List[List[Dict[str, Any]]], dev_labels: List[List[str]]) -> None: """ execute hyper paramter tuning with grid search :param dev_features: [description] :param dev_labels: [description] :return: [description] """ c1 = [0.01, 0.05, 0.1] c2 = [0.01, 0.05, 0.1] tmp_f1_score = 0 tmp_model = None for c1_ in c1: for c2_ in c2: self.model = CRF(algorithm=self._algo, min_freq=self._min_freq, all_possible_states=self._all_states, max_iterations=self._max_iter, epsilon=self._epsilon, delta=self._delta, c1=c1_, c2=c2_) self.train(train_features, train_labels) predicted = self.predict_all(dev_features) labels = list(self.model.classes_) labels.remove('O') f1_score = flat_f1_score(dev_labels, predicted, average='weighted', labels=labels) if f1_score > tmp_f1_score: tmp_f1_score = f1_score tmp_model = self.model self.model = tmp_model