def evaluate(args): fpath = args.model_save + '/' + args.datasetName + '_1.h5' #fpath = 'models/'+args.datasetName+'_1.h5' save_dir, model_init = os.path.split(fpath) modelPath, _ = get_last_model_path(save_dir, model_init) print(modelPath) inputPath = args.testFile inputColumns = {0: "tokens", 1: 'POS', 2: 'chunk_BIO'} resfpath = args.result_save + '/' + args.task + '/' + args.testSetting resfile = open(resfpath, 'w') # :: Load the model :: lstmModel = ELMoBiLSTM.loadModel(modelPath) # :: Prepare the input :: sentences = readCoNLL(inputPath, inputColumns) addCharInformation(sentences) addCasingInformation(sentences) # :: Map casing and character information to integer indices :: dataMatrix = createMatrices(sentences, lstmModel.mappings, True) # :: Perform the word embedding / ELMo embedding lookup :: embLookup = lstmModel.embeddingsLookup embLookup.elmo_cuda_device = 0 #Cuda device for pytorch - elmo embedding, -1 for CPU addEmbeddings(dataMatrix, embLookup.sentenceLookup) if (args.task == "pos"): # Evaluation of POS tagging test_acc = lstmModel.computeAcc(args.datasetName, dataMatrix) print("Test-Data: Accuracy: %.4f" % (test_acc)) resfile.write("Test-Data: Accuracy: %.4f" % (test_acc)) elif (args.task == "chunking"): # Evaluation of Chunking test_pre, test_rec, test_f1 = lstmModel.computeF1( args.datasetName, dataMatrix) print("Test-Data: Prec: %.3f, Rec: %.3f, F1: %.4f" % (test_pre, test_rec, test_f1)) resfile.write("Test-Data: Prec: %.3f, Rec: %.3f, F1: %.4f" % (test_pre, test_rec, test_f1)) resfile.close()
import sys import logging if len(sys.argv) < 3: print( "Usage: python RunModel_CoNLL_Format.py modelPath inputPathToConllFile" ) exit() modelPath = sys.argv[1] inputPath = sys.argv[2] inputColumns = {0: "tokens", 1: "NER_BIO"} #inputColumns = {0: "tokens", 1: "is_name", 2: "NER_BIO"} # :: Prepare the input :: sentences = readCoNLL(inputPath, inputColumns) addCharInformation(sentences) addCasingInformation(sentences) # :: Load the model :: lstmModel = BiLSTM.loadModel(modelPath) dataMatrix = createMatrices(sentences, lstmModel.mappings, True) # :: Tag the input :: tags = lstmModel.tagSentences(dataMatrix) # :: Output to stdout :: all_sentences_preds = [] for sentenceIdx in range(len(sentences)): tokens = sentences[sentenceIdx]['tokens']
decision_txt_path = decision # 2 file to conll file subprocess.check_call([ "python", "/home/pavel/code/pseudo_conseil_etat/src/data/normal_doc2conll.py", decision_txt_path ]) decision_conll_path = decision_txt_path[:-4] + "_TestCoNLL.txt" #3 predict conll file inputColumns = {0: "tokens"} # :: Prepare the input :: sentences = readCoNLL(decision_conll_path, inputColumns) addCharInformation(sentences) addCasingInformation(sentences) dataMatrix = createMatrices(sentences, lstmModel.mappings, True) # :: Tag the input :: tags = lstmModel.tagSentences(dataMatrix) # :: Output to stdout :: list_token_tags = [] for sentenceIdx in range(len(sentences)): tokens = sentences[sentenceIdx]['tokens'] sentence = [] for tokenIdx in range(len(tokens)):
required=True, type=str) args = parser.parse_args() #if len(sys.argv) < 4: # print("Usage: python RunModel.py modelPath inputPathToConllFile outputPathToConllFile") # exit() #modelPath = sys.argv[1] #inputPath = sys.argv[2] #outputPath = sys.argv[3] inputColumns = {0: "tokens", 1: "gold"} # :: Prepare the input :: sentences = readCoNLL(args.input_file, inputColumns) addCharInformation(sentences) addCasingInformation(sentences) # :: Load the model :: lstmModel = BiLSTM.loadModel(args.model_path) params = lstmModel.get_params() #print("params : {}".format(params)) dataMatrix = createMatrices(sentences, lstmModel.mappings, True) # :: Tag the input :: tags = lstmModel.tagSentences(dataMatrix) # :: Output to stdout :: f = None
# :: Load the model :: lstmModel = BiLSTM.loadModel(modelPath) ########################## file_list = set([ os.path.basename(x) for x in glob.glob(os.path.join(inputPath, "*.conll")) ]) ##for this_filename in tqdm( sorted( file_list ) ): for this_filename in sorted(file_list): output_filename = re.sub(".conll$", ".ann", this_filename) output_fullpath = os.path.join(inputPath, output_filename) ##print( '{}'.format( output_fullpath ) ) with open(output_fullpath, 'w') as fp: pass # :: Prepare the input :: sentences = readCoNLL(os.path.join(inputPath, this_filename), inputColumns) addCharInformation(sentences) addCasingInformation(sentences) ## dataMatrix = createMatrices(sentences, lstmModel.mappings, True) # :: Tag the input :: tags = lstmModel.tagSentences(dataMatrix) # :: Output to stdout :: annot_count = 0 coveredText = [] firstBegin = 0 lastEnd = 0 for sentenceIdx in range(len(sentences)): tokens = sentences[sentenceIdx]['tokens'] ## for tokenIdx in range(len(tokens)):