Python load_dataset_from_bio 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: deep_ner.utils

메소드/함수: load_dataset_from_bio

hotexamples.com에서의 예제들: 2

Python load_dataset_from_bio - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 deep_ner.utils.load_dataset_from_bio에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: demo_bert_conll2003.py 프로젝트: kuilef/deep_ner

def recognize(test_file_name: str, split_by_paragraphs: bool,
              recognizer: BERT_NER, results_file_name: str):
    X_test, y_test = load_dataset_from_bio(
        test_file_name,
        paragraph_separators=({'-DOCSTART-'} if split_by_paragraphs else None),
        stopwords={'-DOCSTART-'})
    print('The CoNLL-2003 data for final testing have been loaded...')
    print('Number of samples is {0}.'.format(len(y_test)))
    print('')
    y_pred = recognizer.predict(X_test)
    f1, precision, recall, quality_by_entities = calculate_prediction_quality(
        y_test, y_pred, classes_list=recognizer.classes_list_)
    print('All entities:')
    print('    F1-score is {0:.2%}.'.format(f1))
    print('    Precision is {0:.2%}.'.format(precision))
    print('    Recall is {0:.2%}.'.format(recall))
    for ne_type in sorted(list(quality_by_entities.keys())):
        print('  {0}'.format(ne_type))
        print('    F1-score is {0:.2%}.'.format(
            quality_by_entities[ne_type][0]))
        print('    Precision is {0:.2%}.'.format(
            quality_by_entities[ne_type][1]))
        print('    Recall is {0:.2%}.'.format(quality_by_entities[ne_type][2]))
    print('')
    save_dataset_as_bio(test_file_name,
                        X_test,
                        y_pred,
                        results_file_name,
                        stopwords={'-DOCSTART-'})

예제 #2

파일 보기

파일: demo_bert_conll2003.py 프로젝트: kuilef/deep_ner

def train(train_file_name: str, valid_file_name: str,
          split_by_paragraphs: bool, bert_will_be_tuned: bool,
          lstm_layer_size: Union[int, None], l2: float, max_epochs: int,
          batch_size: int, gpu_memory_frac: float,
          model_name: str) -> BERT_NER:
    if os.path.isfile(model_name):
        with open(model_name, 'rb') as fp:
            recognizer = pickle.load(fp)
        assert isinstance(recognizer, BERT_NER)
        print('The NER has been successfully loaded from the file `{0}`...'.
              format(model_name))
        print('')
    else:
        X_train, y_train = load_dataset_from_bio(
            train_file_name,
            paragraph_separators=({'-DOCSTART-'}
                                  if split_by_paragraphs else None),
            stopwords={'-DOCSTART-'})
        X_val, y_val = load_dataset_from_bio(
            valid_file_name,
            paragraph_separators=({'-DOCSTART-'}
                                  if split_by_paragraphs else None),
            stopwords={'-DOCSTART-'})
        print(
            'The CoNLL-2003 data for training and validation have been loaded...'
        )
        print('Number of samples for training is {0}.'.format(len(y_train)))
        print('Number of samples for validation is {0}.'.format(len(y_val)))
        print('')
        if BERT_NER.PATH_TO_BERT is None:
            bert_hub_module_handle = 'https://tfhub.dev/google/bert_cased_L-12_H-768_A-12/1'
        else:
            bert_hub_module_handle = None
        recognizer = BERT_NER(finetune_bert=bert_will_be_tuned,
                              batch_size=batch_size,
                              l2_reg=l2,
                              bert_hub_module_handle=bert_hub_module_handle,
                              lstm_units=lstm_layer_size,
                              max_epochs=max_epochs,
                              patience=5,
                              gpu_memory_frac=gpu_memory_frac,
                              verbose=True,
                              random_seed=42,
                              lr=1e-6 if bert_will_be_tuned else 1e-4)
        recognizer.fit(X_train, y_train, validation_data=(X_val, y_val))
        print('')
        print(
            'The NER has been successfully fitted and saved into the file `{0}`...'
            .format(model_name))
        y_pred = recognizer.predict(X_val)
        f1, precision, recall, quality_by_entities = calculate_prediction_quality(
            y_val, y_pred, classes_list=recognizer.classes_list_)
        print('All entities:')
        print('    F1-score is {0:.2%}.'.format(f1))
        print('    Precision is {0:.2%}.'.format(precision))
        print('    Recall is {0:.2%}.'.format(recall))
        for ne_type in sorted(list(quality_by_entities.keys())):
            print('  {0}'.format(ne_type))
            print('    F1-score is {0:.2%}.'.format(
                quality_by_entities[ne_type][0]))
            print('    Precision is {0:.2%}.'.format(
                quality_by_entities[ne_type][1]))
            print('    Recall is {0:.2%}.'.format(
                quality_by_entities[ne_type][2]))
        print('')
        with open(model_name, 'wb') as fp:
            pickle.dump(recognizer, fp)
    return recognizer