Python readCoNLL Exemples, util.preprocessing.readCoNLL Python Exemples

Exemple #1

0

Afficher le fichier

def evaluate(args):
    fpath = args.model_save + '/' + args.datasetName + '_1.h5'
    #fpath = 'models/'+args.datasetName+'_1.h5'
    save_dir, model_init = os.path.split(fpath)

    modelPath, _ = get_last_model_path(save_dir, model_init)
    print(modelPath)
    inputPath = args.testFile
    inputColumns = {0: "tokens", 1: 'POS', 2: 'chunk_BIO'}

    resfpath = args.result_save + '/' + args.task + '/' + args.testSetting
    resfile = open(resfpath, 'w')

    # :: Load the model ::
    lstmModel = ELMoBiLSTM.loadModel(modelPath)

    # :: Prepare the input ::
    sentences = readCoNLL(inputPath, inputColumns)
    addCharInformation(sentences)
    addCasingInformation(sentences)

    # :: Map casing and character information to integer indices ::
    dataMatrix = createMatrices(sentences, lstmModel.mappings, True)

    # :: Perform the word embedding / ELMo embedding lookup ::
    embLookup = lstmModel.embeddingsLookup
    embLookup.elmo_cuda_device = 0  #Cuda device for pytorch - elmo embedding, -1 for CPU
    addEmbeddings(dataMatrix, embLookup.sentenceLookup)

    if (args.task == "pos"):
        # Evaluation of POS tagging
        test_acc = lstmModel.computeAcc(args.datasetName, dataMatrix)
        print("Test-Data: Accuracy: %.4f" % (test_acc))
        resfile.write("Test-Data: Accuracy: %.4f" % (test_acc))
    elif (args.task == "chunking"):
        # Evaluation of Chunking
        test_pre, test_rec, test_f1 = lstmModel.computeF1(
            args.datasetName, dataMatrix)
        print("Test-Data: Prec: %.3f, Rec: %.3f, F1: %.4f" %
              (test_pre, test_rec, test_f1))
        resfile.write("Test-Data: Prec: %.3f, Rec: %.3f, F1: %.4f" %
                      (test_pre, test_rec, test_f1))

    resfile.close()

Exemple #2

0

Afficher le fichier

Fichier : RunEvaluateModel_CoNLL_Format.py Projet : psorianom/emnlp2017-bilstm-cnn-crf

import sys
import logging

if len(sys.argv) < 3:
    print(
        "Usage: python RunModel_CoNLL_Format.py modelPath inputPathToConllFile"
    )
    exit()

modelPath = sys.argv[1]
inputPath = sys.argv[2]
inputColumns = {0: "tokens", 1: "NER_BIO"}
#inputColumns = {0: "tokens", 1: "is_name", 2: "NER_BIO"}

# :: Prepare the input ::
sentences = readCoNLL(inputPath, inputColumns)
addCharInformation(sentences)
addCasingInformation(sentences)

# :: Load the model ::
lstmModel = BiLSTM.loadModel(modelPath)

dataMatrix = createMatrices(sentences, lstmModel.mappings, True)

# :: Tag the input ::
tags = lstmModel.tagSentences(dataMatrix)

# :: Output to stdout ::
all_sentences_preds = []
for sentenceIdx in range(len(sentences)):
    tokens = sentences[sentenceIdx]['tokens']

Exemple #3

0

Afficher le fichier

        decision_txt_path = decision
        # 2 file to conll file
        subprocess.check_call([
            "python",
            "/home/pavel/code/pseudo_conseil_etat/src/data/normal_doc2conll.py",
            decision_txt_path
        ])

        decision_conll_path = decision_txt_path[:-4] + "_TestCoNLL.txt"

        #3 predict conll file

        inputColumns = {0: "tokens"}

        # :: Prepare the input ::
        sentences = readCoNLL(decision_conll_path, inputColumns)
        addCharInformation(sentences)
        addCasingInformation(sentences)

        dataMatrix = createMatrices(sentences, lstmModel.mappings, True)

        # :: Tag the input ::
        tags = lstmModel.tagSentences(dataMatrix)

        # :: Output to stdout ::
        list_token_tags = []
        for sentenceIdx in range(len(sentences)):
            tokens = sentences[sentenceIdx]['tokens']
            sentence = []
            for tokenIdx in range(len(tokens)):

Exemple #4

0

Afficher le fichier

Fichier : RunModel_CoNLL_Format.py Projet : slouvan/emnlp2017-bilstm-cnn-crf

                    required=True,
                    type=str)

args = parser.parse_args()

#if len(sys.argv) < 4:
#    print("Usage: python RunModel.py modelPath inputPathToConllFile outputPathToConllFile")
#    exit()

#modelPath = sys.argv[1]
#inputPath = sys.argv[2]
#outputPath = sys.argv[3]
inputColumns = {0: "tokens", 1: "gold"}

# :: Prepare the input ::
sentences = readCoNLL(args.input_file, inputColumns)
addCharInformation(sentences)
addCasingInformation(sentences)

# :: Load the model ::
lstmModel = BiLSTM.loadModel(args.model_path)
params = lstmModel.get_params()
#print("params : {}".format(params))

dataMatrix = createMatrices(sentences, lstmModel.mappings, True)

# :: Tag the input ::
tags = lstmModel.tagSentences(dataMatrix)

# :: Output to stdout ::
f = None

Exemple #5

0

Afficher le fichier

Fichier : RunModel_CoNLL_Format_Med.py Projet : MUSC-TBIC/pipeline-test-harness

# :: Load the model ::
lstmModel = BiLSTM.loadModel(modelPath)

##########################
file_list = set([
    os.path.basename(x) for x in glob.glob(os.path.join(inputPath, "*.conll"))
])
##for this_filename in tqdm( sorted( file_list ) ):
for this_filename in sorted(file_list):
    output_filename = re.sub(".conll$", ".ann", this_filename)
    output_fullpath = os.path.join(inputPath, output_filename)
    ##print( '{}'.format( output_fullpath ) )
    with open(output_fullpath, 'w') as fp:
        pass
    # :: Prepare the input ::
    sentences = readCoNLL(os.path.join(inputPath, this_filename), inputColumns)
    addCharInformation(sentences)
    addCasingInformation(sentences)
    ##
    dataMatrix = createMatrices(sentences, lstmModel.mappings, True)
    # :: Tag the input ::
    tags = lstmModel.tagSentences(dataMatrix)
    # :: Output to stdout ::
    annot_count = 0
    coveredText = []
    firstBegin = 0
    lastEnd = 0
    for sentenceIdx in range(len(sentences)):
        tokens = sentences[sentenceIdx]['tokens']
        ##
        for tokenIdx in range(len(tokens)):