Beispiel #1
0
def main():
    config = Config('./results/train_folds/')
    train_predictions_file = './data/predictions/formatted_train_predictions.npy'

    kf = KFold(n_splits=5)

    train = CoNLLDataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)

    train = np.array([el for el in train])
    predictions = [0 for _ in train]

    for train_ids, evaluate_ids in kf.split(train):
        train_dataset = train[train_ids]
        evaluate_dataset = train[evaluate_ids]
        tf.reset_default_graph()
        config = Config('./results/train_folds/')
        model = NERModel(config)
        model.build()
        model.train(train_dataset, evaluate_dataset)
        for id, tags in zip(evaluate_ids,
                            model.predict_test(evaluate_dataset)):
            predictions[id] = tags
        model.close_session()

    predictions = np.array(predictions)
    formatted_predictions = format_predictions(predictions, 'train', config)
    np.save(train_predictions_file, formatted_predictions)
Beispiel #2
0
def main():
    # Предсказания моделью первого уровня #
    config_first = Config(dir_output='./results/train_first/')
    model = NERModel(config_first)
    model.build()
    model.restore_session(config_first.dir_model)
    test = CoNLLDataset(config_first.filename_test,
                        config_first.processing_word,
                        config_first.processing_tag, config_first.max_iter)

    print()
    print('Predicting first stage!')
    model.evaluate(test)
    print()

    test_predictions = model.predict_test(test)
    formatted_predictions = format_predictions(test_predictions, 'test',
                                               config_first)

    # Предсказания моделью второго уровня #
    tf.reset_default_graph()
    config_second = Config(dir_output='./results/train_second/')
    model = NERModel2(config_second)
    model.build()
    model.restore_session(config_second.dir_model)

    print()
    print('Predicting second stage!')
    model.evaluate(formatted_predictions)
    print()
Beispiel #3
0
        new_test_filenames.append(f)
test_filenames=new_test_filenames

# build model
conll_test_files = []
for f in test_filenames:
    test  = CoNLLDataset(f, config.processing_word,None, config.max_iter, 
                         stream=input_format,file_format=file_format)
    conll_test_files.append(test)
#add OOV words to the model
add_oov_words(conll_test_files,config)

model = NERModel(config)
model.build()
model.restore_session(config.dir_model)


# create dataset
i = 0
for test in conll_test_files:
    output_target = sys.stdout
    if output_format==FileStream.FILE:
        if input_format== FileStream.SYSTEM:    
            output_file = os.path.join(args.output_folder,"output.txt")
            output_target=open(output_file,"a")
            sys.stderr.write("output is appended to :"+output_file+"\n")
        else:
            output_target=open(os.path.join(args.output_folder,os.path.basename(test_filenames[i])),"w")
    model.predict_test(test,output=output_target)
    i+=1