def main(): # create instance of config config = Config(parser) # build model model = HANNModel(config) model.build() if config.restore: model.restore_session( "results/test/model.weights/") # optional, restore weights # create datasets dev = Dataset(config.filename_dev, config.processing_word, config.processing_tag, config.max_iter) train = Dataset(config.filename_train, config.processing_word, config.processing_tag, config.max_iter) test = Dataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # train model model.train(train, dev) # evaluate model model.restore_session(config.dir_model) metrics = model.evaluate(test) with open(os.path.join(config.dir_output, 'test_results.txt'), 'a') as file: file.write('{}\n'.format(metrics['classification-report'])) file.write('{}\n'.format(metrics['confusion-matrix'])) file.write('{}\n\n'.format(metrics['weighted-f1']))
def main(): # create instance of config config = Config(parser) # build model model = HANNModel(config) model.build() ###############################################comment this if model is trained from scratch config.restore = True if config.restore: model.restore_session( "/home/lena/Dokumente/Master/dissertation/Data/output/model.weights" ) # optional, restore weights model.reinitialize_weights("proj") #reinitialise for this scope ##################################################################### # create datasets dev = Dataset(config.filename_dev, config.processing_word, config.processing_tag, config.max_iter) train = Dataset(config.filename_train, config.processing_word, config.processing_tag, config.max_iter) test = Dataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # train model model.train(train, dev) # evaluate model model.evaluate(test)
def predict(weights_path, dataset, abstract_sentences): parser = argparse.ArgumentParser() config = Config(parser, log_config=False, dataset=dataset) # restore model weights model = HANNModel(config) model.build() model.restore_session(weights_path) sentences_words = [] # split abstract to sentences for line in abstract_sentences: # split line into words and map words to ids sentence = [config.processing_word(word) for word in line.split()] sentences_words += [sentence] # run prediction labels_pred, _ = model.predict_batch([sentences_words]) # map: label id to label string tag_id_to_label = dict((v, k) for k, v in config.vocab_tags.items()) # convert predicted labels to string labels_pred_str = [] for sublist in labels_pred: for item in sublist: labels_pred_str.append(tag_id_to_label[item]) return labels_pred_str
def main(): # create instance of config print('x') config = Config(parser) #config.lossParam = 'weights'##not using weights for now # build model model = HANNModel(config) model.build() print('x') ###############################################comment this if model is trained from scratch config.restore = True if config.restore: model.restore_session("\\\\smbhome.uscs.susx.ac.uk\\ls612\\Documents\\Dissertation\\Data\\newEmbedding\\model.weights") # optional, restore weights # model.reinitialize_weights("proj")#reinitialise for this scope ##################################################################### print('x') # create datasets dev = Dataset(config.filename_dev, config.processing_word, config.processing_tag, config.max_iter) train = Dataset(config.filename_train, config.processing_word, config.processing_tag, config.max_iter) test = Dataset(config.filename_test, config.processing_word, config.processing_tag, config.max_iter) # if config.lossParam == 'weights': # weights=getWeights(train) # wList=[[w] for w in weights ] # # model.class_weight = wList # print(model.class_weight) # print('Using balanced class weights') ##gives self.loss as Tensor("add_5:0", shape=(), dtype=float32) #train model #model.train(train, dev) # evaluate model #model.evaluate(test) testNewData(test, config, model)
def main(): # create instance of config config = Config(parser) config.num_augmentation = 20000 config.batch_size = 20 config.batch_size_aug = 20 config.dir_output = 'test-num_augmentation-{}-2'.format( config.num_augmentation) config.dir_model = os.path.join(config.dir_output, "model.weights") # build model model = HANNModel(config) model.build() # if config.restore: # model.restore_session("results/test/model.weights/") # optional, restore weights # model.reinitialize_weights("proj") # create datasets dev = Dataset(config.filename_dev, config.processing_word, config.processing_tag) train = Dataset(config.filename_train, config.processing_word, config.processing_tag) test = Dataset(config.filename_test, config.processing_word, config.processing_tag) if config.num_augmentation: data_aug = Dataset(config.filename_aug, config.processing_word, max_iter=config.num_augmentation) else: data_aug = None # train model model.train(train, dev, data_aug) # evaluate model model.restore_session(config.dir_model) model.evaluate(test)
import argparse import codecs, re, time import os, sys, warnings os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' os.environ['CUDA_VISIBLE_DEVICES'] = '1,2,3' import warnings warnings.filterwarnings("ignore") import text_tokenize tokenizer = text_tokenize.mytokenizer() parser = argparse.ArgumentParser() config = Config(parser) # build model model = HANNModel(config) model.build() model.restore_session("study_arms/model.weights") def preprocess_ab(line): info = re.split("\s\|\s", line) pmid = info[0] title = info[1] ab = info[2] #sents = sent_tokenize(line) sents = tokenizer.sent_tokenize(ab, mask=False) #sents.insert(0,title) #title = sents[0] return pmid, title, sents
def main(): # create instance of config config = Config() config.num_augmentation = 20000 config.batch_size = 20 config.batch_size_aug = 20 config.dir_output = 'test-num_augmentation-{}'.format( config.num_augmentation) config.dir_model = os.path.join(config.dir_output, "model.weights") result_file_path = os.path.join(config.dir_output, 'cross_validate_results') precisions = {'P': [], 'I': [], 'O': []} recalls = {'P': [], 'I': [], 'O': []} f1s = {'P': [], 'I': [], 'O': []} for fold in range(2, 6): # build model # tf.reset_default_graph() print('Fold {}'.format(fold)) # build model model = HANNModel(config) model.build() # if config.restore: # model.restore_session("results/test/model.weights/") # optional, restore weights # model.reinitialize_weights("proj") # create datasets train = Dataset(os.path.join(config.data_root, str(fold), 'train.txt'), config.processing_word, config.processing_tag) dev = Dataset(os.path.join(config.data_root, str(fold), 'dev.txt'), config.processing_word, config.processing_tag) test = Dataset(os.path.join(config.data_root, str(fold), 'test.txt'), config.processing_word, config.processing_tag) if config.num_augmentation: data_aug = Dataset(config.filename_aug, config.processing_word, max_iter=config.num_augmentation) else: data_aug = None # train model model.train(train, dev, data_aug) # evaluate model model.restore_session(config.dir_model) metrics = model.evaluate(test) [ precisions[tag].append(metrics['precision'][tag]) for tag in ['P', 'I', 'O'] ] [ recalls[tag].append(metrics['recall'][tag]) for tag in ['P', 'I', 'O'] ] [f1s[tag].append(metrics['f1'][tag]) for tag in ['P', 'I', 'O']] msg = 'fold: {}\tprecision: {}\trecall: {}\tf1: {}\n'.format( fold, metrics['precision'], metrics['recall'], metrics['f1']) print(msg) with open(result_file_path, 'a') as ofile: ofile.write(msg) # print('Precision: ', 'P: ', (precisions['P']), 'I: ', (precisions['I']), 'O: ', (precisions['O'])) # print('Recall: ', 'P: ', (recalls['P']), 'I: ', (recalls['I']), 'O: ', (recalls['O'])) # print('F1: ', 'P: ', (f1s['P']), 'I: ', (f1s['I']), 'O: ', (f1s['O'])) # print('Precision: ', 'P: ', np.mean(precisions['P']), 'I: ', np.mean(precisions['I']), 'O: ', np.mean(precisions['O'])) # print('Recall: ', 'P: ', np.mean(recalls['P']), 'I: ', np.mean(recalls['I']), 'O: ', np.mean(recalls['O'])) # res = np.mean([np.mean(values) for values in f1s.values()]) # print('F1: ', 'P: ', np.mean(f1s['P']), 'I: ', np.mean(f1s['I']), 'O: ', np.mean(f1s['O']), 'all avg: ', res) msg = 'Average Precision: P: {}\tI: {}\tO: {}\n'.format( np.mean(precisions['P']), np.mean(precisions['I']), np.mean(precisions['O'])) print(msg) with open(result_file_path, 'a') as ofile: ofile.write(msg) msg = 'Average Recall: P: {}\tI: {}\tO: {}\n'.format( np.mean(recalls['P']), np.mean(recalls['I']), np.mean(recalls['O'])) print(msg) with open(result_file_path, 'a') as ofile: ofile.write(msg) res = np.mean([np.mean(values) for values in f1s.values()]) msg = 'Average F1: P: {}\tI: {}\tO: {}\tall: {}\n'.format( np.mean(f1s['P']), np.mean(f1s['I']), np.mean(f1s['O']), res) print(msg) with open(result_file_path, 'a') as ofile: ofile.write(msg) ofile.write('\n\n\n')