def main(): config = Config('./results/train_folds/') train_predictions_file = './data/predictions/formatted_train_predictions.npy' kf = KFold(n_splits=5) train = CoNLLDataset(config.filename_train, config.processing_word, config.processing_tag, config.max_iter) train = np.array([el for el in train]) predictions = [0 for _ in train] for train_ids, evaluate_ids in kf.split(train): train_dataset = train[train_ids] evaluate_dataset = train[evaluate_ids] tf.reset_default_graph() config = Config('./results/train_folds/') model = NERModel(config) model.build() model.train(train_dataset, evaluate_dataset) for id, tags in zip(evaluate_ids, model.predict_test(evaluate_dataset)): predictions[id] = tags model.close_session() predictions = np.array(predictions) formatted_predictions = format_predictions(predictions, 'train', config) np.save(train_predictions_file, formatted_predictions)
def main(): # Предсказания моделью первого уровня # config_first = Config(dir_output='./results/train_first/') model = NERModel(config_first) model.build() model.restore_session(config_first.dir_model) test = CoNLLDataset(config_first.filename_test, config_first.processing_word, config_first.processing_tag, config_first.max_iter) print() print('Predicting first stage!') model.evaluate(test) print() test_predictions = model.predict_test(test) formatted_predictions = format_predictions(test_predictions, 'test', config_first) # Предсказания моделью второго уровня # tf.reset_default_graph() config_second = Config(dir_output='./results/train_second/') model = NERModel2(config_second) model.build() model.restore_session(config_second.dir_model) print() print('Predicting second stage!') model.evaluate(formatted_predictions) print()
new_test_filenames.append(f) test_filenames=new_test_filenames # build model conll_test_files = [] for f in test_filenames: test = CoNLLDataset(f, config.processing_word,None, config.max_iter, stream=input_format,file_format=file_format) conll_test_files.append(test) #add OOV words to the model add_oov_words(conll_test_files,config) model = NERModel(config) model.build() model.restore_session(config.dir_model) # create dataset i = 0 for test in conll_test_files: output_target = sys.stdout if output_format==FileStream.FILE: if input_format== FileStream.SYSTEM: output_file = os.path.join(args.output_folder,"output.txt") output_target=open(output_file,"a") sys.stderr.write("output is appended to :"+output_file+"\n") else: output_target=open(os.path.join(args.output_folder,os.path.basename(test_filenames[i])),"w") model.predict_test(test,output=output_target) i+=1