Example #1
0
def main():
    # create instance of config
    config = Config(parser)

    # build model
    model = HANNModel(config)
    model.build()
    if config.restore:
        model.restore_session(
            "results/test/model.weights/")  # optional, restore weights

    # create datasets
    dev = Dataset(config.filename_dev, config.processing_word,
                  config.processing_tag, config.max_iter)
    train = Dataset(config.filename_train, config.processing_word,
                    config.processing_tag, config.max_iter)
    test = Dataset(config.filename_test, config.processing_word,
                   config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)

    # evaluate model
    model.restore_session(config.dir_model)
    metrics = model.evaluate(test)

    with open(os.path.join(config.dir_output, 'test_results.txt'),
              'a') as file:
        file.write('{}\n'.format(metrics['classification-report']))
        file.write('{}\n'.format(metrics['confusion-matrix']))
        file.write('{}\n\n'.format(metrics['weighted-f1']))
Example #2
0
def main():
    # create instance of config
    config = Config(parser)

    # build model
    model = HANNModel(config)
    model.build()
    ###############################################comment this if model is trained from scratch
    config.restore = True
    if config.restore:
        model.restore_session(
            "/home/lena/Dokumente/Master/dissertation/Data/output/model.weights"
        )  # optional, restore weights
    model.reinitialize_weights("proj")  #reinitialise for this scope
    #####################################################################

    # create datasets
    dev = Dataset(config.filename_dev, config.processing_word,
                  config.processing_tag, config.max_iter)
    train = Dataset(config.filename_train, config.processing_word,
                    config.processing_tag, config.max_iter)
    test = Dataset(config.filename_test, config.processing_word,
                   config.processing_tag, config.max_iter)

    # train model
    model.train(train, dev)

    # evaluate model
    model.evaluate(test)
def predict(weights_path, dataset, abstract_sentences):

    parser = argparse.ArgumentParser()
    config = Config(parser, log_config=False, dataset=dataset)

    # restore model weights
    model = HANNModel(config)
    model.build()
    model.restore_session(weights_path)

    sentences_words = []
    # split abstract to sentences
    for line in abstract_sentences:
        # split line into words and map  words to ids
        sentence = [config.processing_word(word) for word in line.split()]
        sentences_words += [sentence]

    # run prediction
    labels_pred, _ = model.predict_batch([sentences_words])

    # map: label id to label string
    tag_id_to_label = dict((v, k) for k, v in config.vocab_tags.items())

    # convert predicted labels to string
    labels_pred_str = []
    for sublist in labels_pred:
        for item in sublist:
            labels_pred_str.append(tag_id_to_label[item])

    return labels_pred_str
Example #4
0
def main():
    # create instance of config
    print('x')
    
    config = Config(parser)
    #config.lossParam = 'weights'##not using weights for now
    
    # build model
    model = HANNModel(config)
    model.build()
    print('x')
    ###############################################comment this if model is trained from scratch
    config.restore = True
    if config.restore:
        model.restore_session("\\\\smbhome.uscs.susx.ac.uk\\ls612\\Documents\\Dissertation\\Data\\newEmbedding\\model.weights") # optional, restore weights
   # model.reinitialize_weights("proj")#reinitialise for this scope
    #####################################################################
    print('x')
    # create datasets
    dev   = Dataset(config.filename_dev, config.processing_word,
                         config.processing_tag, config.max_iter)
    train = Dataset(config.filename_train, config.processing_word,
                         config.processing_tag, config.max_iter)
    test  = Dataset(config.filename_test, config.processing_word,
                         config.processing_tag, config.max_iter)

    
#    if config.lossParam == 'weights':
#        weights=getWeights(train)
#        wList=[[w] for w in weights ]
#        
#        model.class_weight = wList
#        print(model.class_weight)
#        print('Using balanced class weights')
        ##gives self.loss as Tensor("add_5:0", shape=(), dtype=float32)
    
    #train model
    #model.train(train, dev)

    # evaluate model
    #model.evaluate(test)
    testNewData(test, config, model)
Example #5
0
def main():
    # create instance of config
    config = Config(parser)
    config.num_augmentation = 20000
    config.batch_size = 20
    config.batch_size_aug = 20
    config.dir_output = 'test-num_augmentation-{}-2'.format(
        config.num_augmentation)
    config.dir_model = os.path.join(config.dir_output, "model.weights")

    # build model
    model = HANNModel(config)
    model.build()
    # if config.restore:
    # model.restore_session("results/test/model.weights/") # optional, restore weights
    # model.reinitialize_weights("proj")

    # create datasets
    dev = Dataset(config.filename_dev, config.processing_word,
                  config.processing_tag)
    train = Dataset(config.filename_train, config.processing_word,
                    config.processing_tag)
    test = Dataset(config.filename_test, config.processing_word,
                   config.processing_tag)
    if config.num_augmentation:
        data_aug = Dataset(config.filename_aug,
                           config.processing_word,
                           max_iter=config.num_augmentation)
    else:
        data_aug = None

    # train model
    model.train(train, dev, data_aug)

    # evaluate model
    model.restore_session(config.dir_model)
    model.evaluate(test)
Example #6
0
import argparse
import codecs, re, time
import os, sys, warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
os.environ['CUDA_VISIBLE_DEVICES'] = '1,2,3'
import warnings
warnings.filterwarnings("ignore")

import text_tokenize
tokenizer = text_tokenize.mytokenizer()

parser = argparse.ArgumentParser()
config = Config(parser)
# build model
model = HANNModel(config)
model.build()
model.restore_session("study_arms/model.weights")


def preprocess_ab(line):
    info = re.split("\s\|\s", line)
    pmid = info[0]
    title = info[1]
    ab = info[2]
    #sents = sent_tokenize(line)
    sents = tokenizer.sent_tokenize(ab, mask=False)
    #sents.insert(0,title)
    #title = sents[0]

    return pmid, title, sents
Example #7
0
def main():
    # create instance of config
    config = Config()
    config.num_augmentation = 20000
    config.batch_size = 20
    config.batch_size_aug = 20
    config.dir_output = 'test-num_augmentation-{}'.format(
        config.num_augmentation)
    config.dir_model = os.path.join(config.dir_output, "model.weights")

    result_file_path = os.path.join(config.dir_output,
                                    'cross_validate_results')

    precisions = {'P': [], 'I': [], 'O': []}
    recalls = {'P': [], 'I': [], 'O': []}
    f1s = {'P': [], 'I': [], 'O': []}

    for fold in range(2, 6):
        # build model
        # tf.reset_default_graph()
        print('Fold {}'.format(fold))

        # build model
        model = HANNModel(config)
        model.build()
        # if config.restore:
        # model.restore_session("results/test/model.weights/") # optional, restore weights
        # model.reinitialize_weights("proj")

        # create datasets
        train = Dataset(os.path.join(config.data_root, str(fold), 'train.txt'),
                        config.processing_word, config.processing_tag)
        dev = Dataset(os.path.join(config.data_root, str(fold), 'dev.txt'),
                      config.processing_word, config.processing_tag)
        test = Dataset(os.path.join(config.data_root, str(fold), 'test.txt'),
                       config.processing_word, config.processing_tag)
        if config.num_augmentation:
            data_aug = Dataset(config.filename_aug,
                               config.processing_word,
                               max_iter=config.num_augmentation)
        else:
            data_aug = None

        # train model
        model.train(train, dev, data_aug)

        # evaluate model
        model.restore_session(config.dir_model)
        metrics = model.evaluate(test)

        [
            precisions[tag].append(metrics['precision'][tag])
            for tag in ['P', 'I', 'O']
        ]
        [
            recalls[tag].append(metrics['recall'][tag])
            for tag in ['P', 'I', 'O']
        ]
        [f1s[tag].append(metrics['f1'][tag]) for tag in ['P', 'I', 'O']]
        msg = 'fold: {}\tprecision: {}\trecall: {}\tf1: {}\n'.format(
            fold, metrics['precision'], metrics['recall'], metrics['f1'])
        print(msg)
        with open(result_file_path, 'a') as ofile:
            ofile.write(msg)

    # print('Precision: ', 'P: ', (precisions['P']), 'I: ', (precisions['I']), 'O: ', (precisions['O']))
    # print('Recall: ', 'P: ', (recalls['P']), 'I: ', (recalls['I']), 'O: ', (recalls['O']))
    # print('F1: ', 'P: ', (f1s['P']), 'I: ', (f1s['I']), 'O: ', (f1s['O']))
    # print('Precision: ', 'P: ', np.mean(precisions['P']), 'I: ', np.mean(precisions['I']), 'O: ', np.mean(precisions['O']))
    # print('Recall: ', 'P: ', np.mean(recalls['P']), 'I: ', np.mean(recalls['I']), 'O: ', np.mean(recalls['O']))
    # res = np.mean([np.mean(values) for values in f1s.values()])
    # print('F1: ', 'P: ', np.mean(f1s['P']), 'I: ', np.mean(f1s['I']), 'O: ', np.mean(f1s['O']), 'all avg: ', res)
    msg = 'Average Precision: P: {}\tI: {}\tO: {}\n'.format(
        np.mean(precisions['P']), np.mean(precisions['I']),
        np.mean(precisions['O']))
    print(msg)
    with open(result_file_path, 'a') as ofile:
        ofile.write(msg)
    msg = 'Average Recall: P: {}\tI: {}\tO: {}\n'.format(
        np.mean(recalls['P']), np.mean(recalls['I']), np.mean(recalls['O']))
    print(msg)
    with open(result_file_path, 'a') as ofile:
        ofile.write(msg)
    res = np.mean([np.mean(values) for values in f1s.values()])
    msg = 'Average F1: P: {}\tI: {}\tO: {}\tall: {}\n'.format(
        np.mean(f1s['P']), np.mean(f1s['I']), np.mean(f1s['O']), res)
    print(msg)
    with open(result_file_path, 'a') as ofile:
        ofile.write(msg)
        ofile.write('\n\n\n')