Python CRF.CRF Beispiele, sklearn_crfsuite.CRF.CRF Python Beispiele

Beispiel #1

0

Datei anzeigen

def write_to_CoNLL(mdl_file_name, sentence2features, test_sentences, write_path):
    X_test_local = []
    cond_rand_mdl = CRF(algorithm='lbfgs',
                        c1=0.0001,
                        c2=0.0001,
                        max_iterations=100,
                        all_possible_transitions=False,
                        model_filename=mdl_file_name)
    if mdl_file_name[(len(mdl_file_name) - 1)] == '2':
        old_crf = CRF(algorithm='lbfgs',
                      c1=0.0001,
                      c2=0.0001,
                      max_iterations=100,
                      all_possible_transitions=False,
                      model_filename=(mdl_file_name[:(len(mdl_file_name) - 1)]) + '1')
        X_test_local = [sent2features_second_guess(s, sentence2features, old_crf) for s in test_sentences]
    else:
        X_test_local = [sentence2features(s) for s in test_sentences]
    predictions = cond_rand_mdl.predict(X_test_local)
    with open(write_path, 'a') as f:
        for i in range(0, len(predictions)):
            sent = test_sentences[i]
            preds = predictions[i]
            for j in range(0, len(sent)):
                str_to_write = '{}\t{}\n'.format(sent[j][0], preds[j])
                f.write(str_to_write)
            f.write('\n')

Beispiel #2

0

Datei anzeigen

Datei: crf.py Projekt: rknaebel/discopy

 def __init__(self):
     self.arg_pos_clf = ArgumentPositionClassifier()
     self.ss_model = CRF(algorithm='lbfgs',
                         c1=0.1,
                         c2=0.1,
                         max_iterations=100,
                         all_possible_transitions=True)
     self.ps_model = CRF(algorithm='lbfgs',
                         c1=0.1,
                         c2=0.1,
                         max_iterations=100,
                         all_possible_transitions=True)

Beispiel #3

0

Datei anzeigen

Datei: train.py Projekt: kittinan/thaiaddress

def train(file_path: str):
    """
    Training CRF model from a given ``file_path``
    """
    addresses = []
    with jsonlines.open(file_path) as reader:
        for obj in reader:
            addresses.append(obj)
    addresses_train, addresses_val = train_test_split(addresses,
                                                      test_size=0.25,
                                                      random_state=42)

    X_train, y_train = addresses_to_features(addresses_train)
    X_val, y_val = addresses_to_features(addresses_val)

    crf = CRF(c1=0.2,
              c2=0.2,
              max_iterations=100,
              all_possible_transitions=True)
    crf.fit(X_train, y_train)

    # prediction score on validation set
    y_pred = crf.predict(X_val)
    metrics.flat_f1_score(y_val,
                          y_pred,
                          average='weighted',
                          labels=[l for l in LABELS if l != 'O'])
    return crf

Beispiel #4

0

Datei anzeigen

def train(file_path: str, model_path: str = None):
    """
    Training CRF model from a given ``file_path``
    """
    addresses = read_file(file_path)
    addresses_train, addresses_val = train_test_split(addresses,
                                                      test_size=0.25,
                                                      random_state=42)

    X_train, y_train = addresses_to_features(addresses_train)
    X_val, y_val = addresses_to_features(addresses_val)

    crf = CRF(c1=0.2,
              c2=0.2,
              max_iterations=100,
              all_possible_transitions=True)
    crf.fit(X_train, y_train)

    # prediction score on validation set
    y_pred = crf.predict(X_val)
    f1_score = metrics.flat_f1_score(y_val,
                                     y_pred,
                                     average="weighted",
                                     labels=[l for l in LABELS if l != "O"])
    print("Flat F1-Score on validation set = {}".format(f1_score))

    if model_path:
        joblib.dump(crf, model_path)
        print("Save model to {}".format(model_path))

    return crf

Beispiel #5

0

Datei anzeigen

Datei: training.py Projekt: cdli-gh/Sumerian-Translation-Pipeline

def main():

    df=pd.read_csv(args.input)
    tagged_sentence=Preparing_tagged_data(df)
    df=df[['ID','FORM','XPOSTAG']]
    #printing details
    printing_details(tagged_sentence)
    
    train_set, test_set = train_test_split(tagged_sentence,test_size=0.05,random_state=7)
    
    #print("Number of Sentences in Training Data ",len(train_set))
    #print("Number of Sentences in Testing Data ",len(test_set))
    X_train,y_train=prepareData(tagged_sentence)
    X_test,y_test=prepareData(test_set)
    
    crf = CRF(
    algorithm='l2sgd',
    c2=0.1,
    max_iterations=1000,
    all_possible_transitions=True)
    
    crf.fit(X_train, y_train)
    print(crf)
    
    print("Saving Model .....")
    # Save the Model to file in the current working directory
    Pkl_Filename = args.output
    with open(Pkl_Filename, 'wb') as file:
        pickle.dump(crf, file)
        
    print("Model Saved at "+ Pkl_Filename)
    print()    
    print("Checking the Algoritham's Performance \n")
    TestData(crf, X_train,y_train,X_test,y_test)

Beispiel #6

0

Datei anzeigen

Datei: classifier.py Projekt: SanneHoeken/NER-Experiments

    def train(self, inputfile, features_names_list, annotation_column):
        """
        This function fits a classification model as specified on training data

        :param inputfile: path to inputfile containing the training data
        :param features_names_list: list of indications of all feature columns that should be used
        :param annotation_column: indication of column with annotations
        :type inputfile: string
        :type features_names_list: list
        :type annotation_column: string
        """

        # initialize the right model
        if self.modelname == 'logreg':
            self.model = LogisticRegression()

        elif self.modelname == 'naivebayes':
            self.model = BernoulliNB()

        elif self.modelname == 'svm':
            self.model = LinearSVC()

        elif self.modelname == 'crf':
            self.model = CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)

        # store features_names_list as class attribute
        self.features_names_list = features_names_list

        # get training features and labels
        train_features = self.get_features(inputfile)
        train_targets = self.get_labels(inputfile, annotation_column)

        # fit the model
        self.model.fit(train_features, train_targets)

Beispiel #7

0

Datei anzeigen

Datei: training.py Projekt: LiamWoodRoberts/Dataturks-NER-Tools

def train_crf(labelled_files, save=True, eval=True):
    x, y, _ = format_labelled_data(labelled_files)

    crf = CRF(algorithm='lbfgs',
              c1=0.1,
              c2=0.1,
              max_iterations=100,
              all_possible_transitions=False)

    if eval:
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            test_size=0.1,
                                                            random_state=42)
        crf.fit(x_train, y_train)
        pred = crf.predict(x_test)
        report = classification_report(y_test, pred)
        print("Test Results:\n")
        line(60)
        print(report)
        line(60)
        log_results(y_test, pred)
        line(60)

    else:
        crf.fit(x, y)

    if save:
        save_crf(crf)

    return crf

Beispiel #8

0

Datei anzeigen

def test_attributes(xseq, yseq):
    crf = CRF()
    assert crf.tagger_ is None
    assert crf.size_ is None
    assert crf.classes_ is None
    assert crf.num_attributes_ is None
    assert crf.attributes_ is None
    assert crf.state_features_ is None
    assert crf.transition_features_ is None

    crf.fit([xseq] * 20, [yseq] * 20)

    assert crf.tagger_ is not None
    assert crf.size_ > 1000
    assert set(crf.classes_) == {'sunny', 'rainy'}

    assert crf.num_attributes_ > 0
    assert len(crf.attributes_) == crf.num_attributes_
    assert all(crf.attributes_)
    assert 'clean' in crf.attributes_

    assert len(crf.state_features_) > 0
    assert all(isinstance(c, float) for c in crf.state_features_.values())
    assert all(attr in crf.attributes_ and label in crf.classes_
               for (attr,
                    label) in crf.state_features_.keys()), crf.state_features_

    assert len(crf.transition_features_) > 0
    assert all(isinstance(c, float) for c in crf.transition_features_.values())
    assert all(label_from in crf.classes_ and label_to in crf.classes_ for (
        label_from,
        label_to) in crf.transition_features_.keys()), crf.transition_features_

Beispiel #9

0

Datei anzeigen

def entity_crf_train(my_subjects):
    for i in range(0, len(X)):
        for j in range(0, len(X[i])):
            if 'sub' in X[i][j]:
                subj = my_subjects[np.random.randint(len(my_subjects))]
                subj = subj.split()
                X[i] = X[i][:j] + subj + X[i][j + 1:]
                y[i] = y[i][:j] + ['subject'] * len(subj) + y[i][j + 1:]
        X[i] = X[i][0:10]
        y[i] = y[i][0:10]

    crf = CRF(c1=0.1,
              c2=0.01,
              max_iterations=200,
              all_possible_transitions=True)

    print(".....Training entity extraction model.....")
    crf.fit(X, y)
    print(".....Trained entity extraction model.....")

    working_directory = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
    with open(working_directory + '/full_model/crf_model.pkl',
              'wb') as pickle_file:
        pickle.dump(crf, pickle_file, protocol=pickle.HIGHEST_PROTOCOL)
    with open(working_directory + '/full_model/subjects.pkl',
              'wb') as pickle_file:
        pickle.dump(my_subjects, pickle_file, protocol=pickle.HIGHEST_PROTOCOL)

Beispiel #10

0

Datei anzeigen

Datei: model.py Projekt: s14t284/Shinra

    def __init__(self, algo: str = 'lbfgs', min_freq: int = 0,
                 all_states: bool = False, max_iter: int = 100,
                 epsilon: float = 1e-5, delta: float = 1e-5):
        """

        :param algo: optimization algorithm (lbfgs, l2sgd, ap, pa, arow)
        :param min_freq: threshold of ignoring feature
        :param all_states: if True, consider combinations
                           of missing features and labels
        :param max_iter: max iteration size
        :param epsilon: learning rate
        :param delta: stop training threshold
        """

        self._algo = algo
        self._min_freq = min_freq
        self._all_states = all_states
        self._max_iter = max_iter
        self._epsilon = epsilon
        self._delta = delta
        self.model = CRF(algorithm=algo,
                         min_freq=min_freq,
                         all_possible_states=all_states,
                         max_iterations=max_iter,
                         epsilon=epsilon,
                         delta=delta)

Beispiel #11

0

Datei anzeigen

Datei: test_metrics_evaluator.py Projekt: cardosolucas/incubator-marvin-trytravis

def test_execute(report_mocked, score_mocked, mocked_params):

    data_source = {"X_test": ['1', '2'], "y_test": ['3', '4']}

    feature_mocked = ('O', 'feature_mocked')
    label_mocked = ('O', 'label___mocked')

    crf_mocked = CRF(algorithm='lbfgs',
                     c1=0.10789964607864502,
                     c2=0.082422264927260847,
                     max_iterations=100,
                     all_possible_transitions=True).fit(
                         feature_mocked, label_mocked)

    model_mocked = {"crf": crf_mocked}

    ac = MetricsEvaluator(model=model_mocked, dataset=data_source)
    ac.execute(params=mocked_params)

    report_mocked.assert_called_once_with(
        ['3', '4'], [['O'], ['O']],
        digits=3,
        labels=['_', 'a', 'b', 'c', 'd', 'e', 'k', 'l', 'm', 'o'])
    score_mocked.assert_called_once_with(
        ['3', '4'], [['O'], ['O']],
        average='weighted',
        labels=['l', 'a', 'b', 'e', '_', 'm', 'o', 'c', 'k', 'd'])

Beispiel #12

0

Datei anzeigen

def training_crf(training_cue, data, dataset):

    getter = get_frase(data)
    frases = getter.get_frase

    get_negaciones(data)

    X = [sent2features(f, training_cue) for f in frases]
    y = [sent2labels(f, training_cue) for f in frases]

    crf = CRF(algorithm='lbfgs',
              c1=0.1,
              c2=0.1,
              max_iterations=100,
              all_possible_transitions=True,
              verbose=True)

    pred = cross_val_predict(estimator=crf, X=X, y=y, cv=5)

    crf.fit(X, y)

    if training_cue == 'cue':
        model_filename = os.getcwd(
        ) + '/models/' + dataset + '/crf_cue_model.pkl'
    else:
        model_filename = os.getcwd(
        ) + '/models/' + dataset + '/crf_sco_model.pkl'

    with open(model_filename, 'wb') as file_model:
        pickle.dump(crf, file_model)

    return (y, pred, crf)

Beispiel #13

0

Datei anzeigen

Datei: test_sklearn_crfsuite.py Projekt: suryalistic/eli5

def test_sklearn_crfsuite(xseq, yseq):
    crf = CRF(c1=0.0, c2=0.1, max_iterations=50)
    crf.fit([xseq], [yseq])

    expl = explain_weights(crf)
    text, html = format_as_all(expl, crf)

    assert "y='sunny' top features" in text
    assert "y='rainy' top features" in text
    assert "Transition features" in text
    assert "sunny   -0.130    0.696" in text
    assert u'+0.124  солнце:не светит' in text

    html_nospaces = html.replace(' ', '').replace("\n", '')
    assert u'солнце:не светит' in html
    assert '<th>rainy</th><th>sunny</th>' in html_nospaces

    try:
        from eli5 import format_as_dataframe, format_as_dataframes
    except ImportError:
        pass
    else:
        from .test_formatters_as_dataframe import check_targets_dataframe
        df_dict = format_as_dataframes(expl)
        check_targets_dataframe(df_dict['targets'], expl)
        df_transition = df_dict['transition_features']
        transition = expl.transition_features
        print(df_transition)
        assert list(transition.class_names) == ['rainy', 'sunny']
        assert np.isclose(df_transition['rainy']['rainy'], transition.coef[0,
                                                                           0])
        assert np.isclose(df_transition['sunny']['rainy'], transition.coef[0,
                                                                           1])
        assert np.isclose(df_transition['rainy']['sunny'], transition.coef[1,
                                                                           0])

Beispiel #14

0

Datei anzeigen

    def fit(self, train_data: Iterable[str], labels: Iterable[Iterable[str]]):
        """

        :param train_data:
        :param labels: labels in BIO or BILOU notation
        :return:
        """

        crf_dataset = self.__create_dataset(train_data, labels)

        features = [
            self.__convert_idata_to_features(message_data)
            for message_data in crf_dataset
        ]

        labels = [
            self.__extract_labels_from_data(message_data)
            for message_data in crf_dataset
        ]

        self.__crf_model = CRF(
            algorithm='lbfgs',
            c1=self.__CONFIG['L1_c'],
            c2=self.__CONFIG['L2_c'],
            max_iterations=self.__CONFIG['max_iterations'],
            all_possible_transitions=True,
        )

        self.__crf_model.fit(features, labels)

        return self

Beispiel #15

0

Datei anzeigen

Datei: bistlmcrf.py Projekt: Hermes777/active_seq_label

    def __init__(self, data):
        #super(BiLSTM_CRF, self).__init__()
        print "build batched lstmcrf..."

        ## add two more label for downlayer lstm, use original label size for CRF
        #label_size = data.label_alphabet_size
        self.label_alphabet=data.label_alphabet
        self.word_alphabet=data.word_alphabet
        #self.label_alphabet_size += 2
        self.crf = CRF(
            algorithm='lbfgs',
            c1=0.1,
            c2=0.1,
            max_iterations=50,
            all_possible_transitions=True
        )
        self.reformulator = Reformulator(data)
        self.useReformulator = False
        self.loss_function = nn.NLLLoss()
        self.topk=50
        self.X_train=[]
        self.Y_train=[]
        self.tag_mask_list=[]
        self.instances=[]
        self.scores_refs=[]
        self.tag_mask=None

Beispiel #16

0

Datei anzeigen

    def build(sequences, labels, **kwargs):
        """
        Builds a sequence classifier from x/y pairs

        :param sequences: A list of sequences, with each member of the sequence
                   represented as features
        :type sequences: list of list of dict
        :param labels: The corresponding labels for each sequence
        :type labels: list of list of str
        :param kwargs: arguments to override the defaults given to the
                       underlying CRF
        :return: A trained sequence classifier based on the provided training
                 data
        :rtype: SequenceClassifier
        """
        params = {
            'algorithm': DEFAULT_ALGORITHM,
            'c1': DEFAULT_C1,
            'c2': DEFAULT_C2,
            'max_iterations': DEFAULT_MAX_ITERATIONS,
            'all_possible_transitions': DEFAULT_ALL_POSSIBLE_TRANSITIONS
        }

        if kwargs:
            params.update(kwargs)

        model = CRF(**params)
        model.fit(sequences, labels)
        return SequenceClassifier(model)

Beispiel #17

0

Datei anzeigen

    def cv_eval(sentence_result,
                slot_result,
                cv=5,
                max_iterations=100,
                c1=0.17,
                c2=0.01):
        """用cv验证模型"""
        x_train = sentences_to_features(sentence_result)
        y_train = slot_result
        f1_score = make_scorer(metrics.flat_f1_score, average='weighted')

        crf = CRF(algorithm='lbfgs',
                  c1=c1,
                  c2=c2,
                  max_iterations=max_iterations,
                  all_possible_transitions=True,
                  verbose=True)

        cv_result = cross_validate(crf,
                                   x_train,
                                   y_train,
                                   scoring=f1_score,
                                   cv=cv,
                                   verbose=10)

        for k, v in cv_result.items():
            print(k)
            print(np.mean(v))
            print(v)

Beispiel #18

0

Datei anzeigen

def main(path_train, path_test, path_pred, path_crf, take_first, dev_size):
    print("loading train corpus..")
    _, X_raw, y = load_corpus(path_train, take_first=take_first)
    print("extracting features from train corpus..")
    fe = TaggerFeatureExtractor()
    X = fe.fit_transform(tqdm(X_raw))
    print("training..")
    crf = CRF(algorithm='ap', verbose=True, max_iterations=10)
    if dev_size:
        X, X_dev, y, y_dev = train_test_split(X, y, test_size=dev_size)
    else:
        X_dev, y_dev = None, None
    crf.fit(X, y, X_dev, y_dev)

    print("saving..")
    joblib.dump({'fe': fe, 'crf': crf}, path_crf, compress=2)

    print("loading test corpus..")
    corpus, X_test_raw, y_test = load_corpus(path_test)
    print("extracting features from test corpus..")
    X_test = fe.transform(X_test_raw)
    print("predicting..")
    y_pred = crf.predict(tqdm(X_test))

    print("saving results..")
    sents_pred = y_pred_to_sents_pred(corpus, y_pred)
    conll.write_sents(sents_pred, path_pred)

Beispiel #19

0

Datei anzeigen

 def __init__(self):
     super().__init__()
     crf = CRF(algorithm='lbfgs',
               c1=0.1,
               c2=0.1,
               max_iterations=100,
               all_possible_transitions=True)

Beispiel #20

0

Datei anzeigen

Datei: pos_tagging.py Projekt: GayatriPurandharT/NLUTProject

def crf_tag():
    brown_tagged_sents = brown.tagged_sents(categories='news')
    #print(brown_tagged_sents[0])
    train_len = int(len(brown_tagged_sents) * 0.9)
    training_sentences = brown_tagged_sents[:train_len]
    test_sentences = brown_tagged_sents[train_len:]

    X_train, y_train = transform_to_dataset(training_sentences)
    X_test, y_test = transform_to_dataset(test_sentences)

    #print(len(X_train))
    #print(len(X_test))
    print(X_train[0])
    print(y_train[0])

    model = CRF()
    model.fit(X_train, y_train)

    raw_sent = ['I', 'am', 'a', 'student']
    sent_feat = [
        feature_extract(raw_sent, index) for index in range(len(raw_sent))
    ]
    print(list(zip(raw_sent, model.predict([sent_feat])[0])))
    y_pred = model.predict(X_test)
    print(metrics.flat_accuracy_score(y_test, y_pred))

Beispiel #21

0

Datei anzeigen

def test_crf(xseq, yseq, algorithm):
    crf = CRF(algorithm=algorithm)
    crf.fit([xseq], [yseq])

    y_pred = crf.predict([xseq])
    if algorithm != 'ap':  # Averaged Perceptron is regularized too much
        assert y_pred == [yseq]

Beispiel #22

0

Datei anzeigen

Datei: CRF_Teste.py Projekt: rui-pduarte/NLPyPort

def test_crf(train_file,test_file,model_name=""):
    valores = []
    data=pandas.read_csv(train_file,sep="\t",header=None)
    X_dataset=fromListToTuple(data.iloc[:,[0,1,2,3]].values)
    useful_features=[True,True]
    X_train,y_train=prepareData([X_dataset],'train',useful_features)
    data2=pandas.read_csv(test_file,sep="\t",header=None)
    X_test=fromListToTuple(data2.iloc[:,[0,1,2]].values)
    X_teste,y_teste=prepareData([X_test],'predict',useful_features)
    crf = CRF(
            algorithm='lbfgs',
            c1=0.0625,
            c2=0.5,
            max_iterations=100,
            all_possible_transitions=False,
            all_possible_states=True,
            verbose=True
        )
    crf.fit(X_train, y_train)
    if(model_name!=""):
        save_model(model_name + ".pickle",crf)
    useful_features=[True,True]
    data2=pandas.read_csv(test_file,sep="\t",header=None)
    X_test=fromListToTuple(data2.iloc[:,[0,1,2]].values)
    X_teste,y_teste=prepareData([X_test],'predict',useful_features)
    y_pred=crf.predict(X_teste)
    resultados = []
    for index,elem in enumerate(y_pred[0]):
        resultados.append(str(y_pred[0][index]))
    return resultados

Beispiel #23

0

Datei anzeigen

 def __init__(self, model_file_path):
     self.model_file_path = path.abspath(path.expanduser(model_file_path))
     self.model = CRF(algorithm='l2sgd',
                      c2=0.1,
                      max_iterations=1000,
                      all_possible_transitions=True,
                      model_filename=self.model_file_path)

Beispiel #24

0

Datei anzeigen

    def __init__(
        self,
        hyper_params: Dict[str, float] = None,
        model_path: str = None,
    ):
        if model_path:
            self.load_model(model_path=model_path)
        else:
            algorithm = (hyper_params["algorithm"] if hyper_params
                         and "algorithm" in hyper_params else "lbfgs")
            c1 = hyper_params[
                "c1"] if hyper_params and "c1" in hyper_params else 0.1
            c2 = hyper_params[
                "c2"] if hyper_params and "c2" in hyper_params else 0.1
            max_iters = (hyper_params["max_iterations"] if hyper_params
                         and "max_iterations" in hyper_params else 100)
            apt = (hyper_params["all_possible trainsitions"] if hyper_params
                   and "max_iterations" in hyper_params else True)

            self.fe = FeatureExtractor()

            self.crf = CRF(
                algorithm=algorithm,
                c1=c1,
                c2=c2,
                max_iterations=max_iters,
                all_possible_transitions=apt,
            )

Beispiel #25

0

Datei anzeigen

    def __init__(self, data):

        print("build batched lstmcrf...")

        self.label_alphabet=data.label_alphabet
        self.word_alphabet=data.word_alphabet

        self.crf = CRF(
            algorithm='lbfgs',
            c1=0.1,
            c2=0.1,
            max_iterations=100,
            all_possible_states=False,
            all_possible_transitions=True
        )
        self.examiner = Examiner(data)
        self.useExaminer = False
        self.loss_function = nn.NLLLoss()
        self.topk=5
        self.X_train=[]
        self.Y_train=[]
        self.pos_mask_list=[]
        self.instances=[]
        self.scores_refs=[]
        self.pos_mask=None
        self.tag_size=data.label_alphabet_size

Beispiel #26

0

Datei anzeigen

    def train(self, model_name, tagged_sentences):
        # Split the dataset for training and testing
        cutoff = int(.75 * len(tagged_sentences))
        training_sentences = tagged_sentences[:cutoff]
        test_sentences = tagged_sentences[cutoff:]

        X_train, y_train = transform_to_dataset(training_sentences)
        X_test, y_test = transform_to_dataset(test_sentences)
        print(len(X_train))
        print(len(X_test))


        print("Training Started........")
        print("it will take time according to your dataset size..")
        model = CRF()
        model.fit(X_train, y_train)
        print("Training Finished!")
        
        print("Evaluating with Test Data...")
        y_pred = model.predict(X_test)
        print("Accuracy is: ")
        print(metrics.flat_accuracy_score(y_test, y_pred))
        
        pickle.dump(model, open(model_name, 'wb'))
        print("Model Saved!")

Beispiel #27

0

Datei anzeigen

    def train_pos_tagger(self, path):
        # Just to make sure
        nltk.download('treebank')

        tagged_sentences = treebank.tagged_sents()

        train_size = int(.80 * len(tagged_sentences))
        training_sentences = tagged_sentences[:train_size]

        X_train, y_train = self.transform_to_dataset(training_sentences)

        model = CRF()

        print('Training started...')
        model.fit(X_train, y_train)
        print('Training finished.')

        # Save classifier to file
        model_pkl = open(path, 'wb')
        pickle.dump(model, model_pkl)
        model_pkl.close()

        print("POSTagger saved.")

        self.classifier = model

Beispiel #28

0

Datei anzeigen

    def create_model(self):
        crf = CRF(algorithm="lbfgs",
                  c1=0.1,
                  c2=0.1,
                  all_possible_transitions=False)

        self.model = crf

Beispiel #29

0

Datei anzeigen

Datei: crf_tagger.py Projekt: RikVN/Neural_DRS

def train(train_file, test_file, min_freq, model_file):
    '''Train a CRF tagger based'''
    # Read in initial training data
    conll_data_train = read_conll_data(train_file)
    train_sents = [[line[0] for line in doc] for doc in conll_data_train]
    train_labels = [[line[2] for line in doc] for doc in conll_data_train]

    # Featurize and create instance from list of sentences
    feat_sent_train = build_dataset(train_sents)
    print("Training on {0} inst".format(len(feat_sent_train)))

    # Train and test loop for parameter settings
    # Create and train CRF model
    # For different parameter options, see:
    # https://sklearn-crfsuite.readthedocs.io/en/latest/_modules/sklearn_crfsuite/estimator.html
    model = CRF(min_freq=min_freq)
    model.fit(feat_sent_train, train_labels)

    # Test the model on held out test set if wanted
    if args.test_file:
        conll_data_test = read_conll_data(test_file)
        test_sents = [[line[0] for line in doc] for doc in conll_data_test]
        test_labels = [[line[2] for line in doc] for doc in conll_data_test]
        feat_sent_test = build_dataset(test_sents)
        # Predicting and printing accuracy
        pred = model.predict(feat_sent_test)
        acc = metrics.flat_accuracy_score(test_labels, pred)
        print("Accuracy: {0}%".format(float(round(acc, 3)) * 100))
    # Save model to disk if wanted
    if args.model:
        print("Saving model to {0}".format(model_file))
        joblib.dump(model, model_file)

Beispiel #30

0

Datei anzeigen

Datei: crf.py Projekt: yoshitomo-matsubara/section-categorization

def parameter_tuning(args, dataset):
    c1s = experiment_util.get_param_list(args.c1)
    c2s = experiment_util.get_param_list(args.c2)
    best_valid_f1_score = -np.inf
    best_c1 = -np.inf
    best_c2 = -np.inf
    best_model = None
    for c1 in c1s:
        for c2 in c2s:
            crf = CRF(algorithm='lbfgs',
                      c1=c1,
                      c2=c2,
                      max_iterations=500,
                      all_possible_transitions=True,
                      verbose=args.debug)
            crf.fit(dataset.training.list_of_feature_dicts,
                    dataset.training.list_of_labels)
            preds = crf.predict(dataset.validation.list_of_feature_dicts)
            valid_f1_score = metrics.flat_f1_score(
                dataset.validation.list_of_labels, preds, average='micro')
            if valid_f1_score > best_valid_f1_score:
                best_valid_f1_score = valid_f1_score
                best_c1 = c1
                best_c2 = c2
                best_model = crf
    print('Best validation F1 score:', best_valid_f1_score, 'Best c1:',
          best_c1, 'Best c2:', best_c2)
    return best_model