def __init__(self): ##### Reset tensorflow variables # imp.reload(tf) tf.reset_default_graph() #### Embedding Model ##### set_gpus(0) self.elmo = hub.Module("https://tfhub.dev/google/elmo/1", trainable=True) self.sentences = tf.placeholder('string', shape=(None, None)) self.text_len = tf.placeholder('int32', shape=(None)) lm_embeddings = self.elmo(inputs={ "tokens": self.sentences, "sequence_len": self.text_len }, signature="tokens", as_dict=True) word_emb = tf.expand_dims(lm_embeddings["word_emb"], 3) # [B, slen, 512] self.lm_emb_op = tf.concat( [ tf.concat([word_emb, word_emb], 2), # [B, slen, 1024, 1] tf.expand_dims(lm_embeddings["lstm_outputs1"], 3), tf.expand_dims(lm_embeddings["lstm_outputs2"], 3) ], 3) # [B, slen, 1024, 3]
def _load_model(self,experiment): util.set_gpus() print "Running experiment: {}.".format(experiment) config = util.get_config("experiments.conf")[experiment] config["log_dir"] = util.mkdirs(os.path.join(config["log_root"], experiment)) util.print_config(config) model = cm.CorefModel(config) saver = tf.train.Saver() log_dir = config["log_dir"] with tf.Session() as session: checkpoint_path = os.path.join(log_dir, "model.max.ckpt") saver.restore(session, checkpoint_path) self.model=model self.session=session
def __init__(self, bert_model="bert_large", num_gpus=0): # BERT Tokenizer indent = "========" proj_path = os.path.abspath(os.path.dirname(__file__)).split("src")[0] print(indent + " loading BERT Tokenizer " + indent) sys.path.insert(1, proj_path + 'models/coref/') # from models.coref.bert import tokenization from bert import tokenization self.tokenizer = tokenization.FullTokenizer( vocab_file=proj_path + 'models/' + bert_model + '/vocab.txt', do_lower_case=False) # load NER model print(indent + " loading Flair NER model " + indent) self.ner_tagger = SequenceTagger.load('ner') # load spacy dependency parser print(indent + " loading Spacy Dependency Parser ===" + indent) self.dep_parser = spacy.load("en_core_web_sm") # initialise coref environment print(indent + " Initialising coref environment " + indent) import util os.environ['data_dir'] = proj_path + "models/" os.system("export data_dir") util.set_gpus(num_gpus) print("Running experiment: {}".format(bert_model)) config = pyhocon.ConfigFactory.parse_file( proj_path + "models/coref/experiments.conf")[bert_model] config["log_dir"] = util.mkdirs( os.path.join(config["log_root"], bert_model)) print(pyhocon.HOCONConverter.convert(config, "hocon")) log_dir = config["log_dir"] self.model = util.get_model(config) self.session = tf.Session() self.model.restore(self.session) print("===========================") print("=== Initialisation Done ===") print("===========================")
import os import sys import time import json import numpy as np import tensorflow as tf import coref_model as cm import inference_utils import input_utils import srl_model as srl import util if __name__ == "__main__": util.set_gpus() name = sys.argv[1] output_filename = sys.argv[2] print("Running experiment: {}.".format(name)) config = util.get_config("experiments.conf")[name] config["log_dir"] = util.mkdirs(os.path.join(config["log_root"], name)) util.print_config(config) #model = cm.CorefModel(config) model = srl.SRLModel(config) model.load_eval_data() saver = tf.train.Saver()
import coref_model as cm import util if __name__ == "__main__": if len(sys.argv) > 1: name = sys.argv[1] else: name = os.environ["EXP"] config = util.get_config("experiments.conf")[name] report_frequency = config["report_frequency"] config["log_dir"] = util.mkdirs(os.path.join(config["log_root"], name)) util.print_config(config) if "GPU" in os.environ: util.set_gpus(int(os.environ["GPU"])) else: util.set_gpus() model = cm.CorefModel(config) saver = tf.train.Saver() init_op = tf.global_variables_initializer() log_dir = config["log_dir"] writer = tf.summary.FileWriter(os.path.join(log_dir, "train"), flush_secs=20) # Create a "supervisor", which oversees the training process. sv = tf.train.Supervisor(logdir=log_dir, init_op=init_op, saver=saver,
import os import sys import time import tensorflow as tf import coref_model as cm import util if __name__ == "__main__": config = util.initialize_from_env() task_index = int(os.environ["TASK"]) report_frequency = config["report_frequency"] cluster_config = config["cluster"] util.set_gpus(cluster_config["gpus"][task_index]) cluster = tf.train.ClusterSpec(cluster_config["addresses"]) server = tf.train.Server(cluster, job_name="worker", task_index=task_index) # Assigns ops to the local worker by default. with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % task_index, cluster=cluster)): model = cm.CorefModel(config) saver = tf.train.Saver() init_op = tf.global_variables_initializer() log_dir = config["log_dir"] writer = tf.summary.FileWriter(os.path.join(log_dir,
name) config = util.get_config("experiments.conf")[name] config["log_dir"] = util.mkdirs(os.path.join(config["log_root"], name)) config["batch_size"] = -1 config["max_tokens_per_batch"] = -1 # Use dev lm, if provided. if config["lm_path"] and "lm_path_dev" in config and config["lm_path_dev"]: config["lm_path"] = config["lm_path_dev"] util.print_config(config) if len(sys.argv) > 3 and sys.argv[2] == '-gpu': util.set_gpus(sys.argv[3]) data = LSGNData(config) model = SRLModel(data, config) evaluator = LSGNEvaluator(config) variables_to_restore = [] for var in tf.global_variables(): print var.name if "module/" not in var.name: variables_to_restore.append(var) saver = tf.train.Saver(variables_to_restore) log_dir = config["log_dir"] with tf.Session() as session:
if len(sys.argv) > 1: name = sys.argv[1] else: name = os.environ["EXP"] config = util.get_config("experiments.conf")[name] print('config') report_frequency = config["report_frequency"] config["log_dir"] = util.mkdirs(os.path.join(config["log_root"], name)) util.print_config(config) print((os.environ)) # if "GPU" in os.environ: # gpus = [int(g) for g in os.environ["GPU"].split(",")] # util.set_gpus(*gpus) # else: util.set_gpus(0) data = LSGNData(config) model = SRLModel(data, config) saver = tf.train.Saver() init_op = tf.global_variables_initializer() log_dir = config["log_dir"] assert not ("final" in name ) # Make sure we don't override a finalized checkpoint. writer = tf.summary.FileWriter(log_dir, flush_secs=20) # Create a "supervisor", which oversees the training process. sv = tf.train.Supervisor(logdir=log_dir, init_op=init_op, saver=saver,
sentences: sent, text_len: slen } ) sentence_id = docid + '_' + str(j) ds = fout.create_dataset( sentence_id, lm_emb.shape[1:], dtype='float32', data=lm_emb[0, :, :, :] # [slen, lm_size, lm_layers] ) fout.close #### Model ##### set_gpus(0) elmo = hub.Module("https://tfhub.dev/google/elmo/1", trainable=True) sentences = tf.placeholder('string', shape=(None, None)) text_len = tf.placeholder('int32', shape=(None)) lm_embeddings = elmo( inputs={ "tokens": sentences, "sequence_len": text_len }, signature="tokens", as_dict=True) word_emb = tf.expand_dims(lm_embeddings["word_emb"], 3) # [B, slen, 512] lm_emb_op = tf.concat([ tf.concat([word_emb, word_emb], 2), # [B, slen, 1024, 1] tf.expand_dims(lm_embeddings["lstm_outputs1"], 3),
import util if __name__ == "__main__": if len(sys.argv) > 1: name = sys.argv[1] else: name = os.environ["EXP"] config = util.get_config("experiments.conf")[name] report_frequency = config["report_frequency"] config["log_dir"] = util.mkdirs(os.path.join(config["log_root"], name)) util.print_config(config) if "GPU" in os.environ: gpus = [int(g) for g in os.environ["GPU"].split(",")] util.set_gpus(*gpus) else: util.set_gpus() data = LSGNData(config) model = SRLModel(data, config) saver = tf.train.Saver() init_op = tf.global_variables_initializer() log_dir = config["log_dir"] assert not ("final" in name ) # Make sure we don't override a finalized checkpoint. writer = tf.summary.FileWriter(log_dir, flush_secs=20) # Create a "supervisor", which oversees the training process. sv = tf.train.Supervisor(logdir=log_dir,
sentences: sent, text_len: slen } ) sentence_id = docid + '_' + str(j) print sentence_id ds = fout.create_dataset( sentence_id, lm_emb.shape[1:], dtype='float32', data=lm_emb[0, :, :, :] # [slen, lm_size, lm_layers] ) fout.close #### Model ##### set_gpus(sys.argv[1]) # set the gpu id elmo = hub.Module("https://tfhub.dev/google/elmo/1", trainable=True) sentences = tf.placeholder('string', shape=(None, None)) text_len = tf.placeholder('int32', shape=(None)) lm_embeddings = elmo( inputs={ "tokens": sentences, "sequence_len": text_len }, signature="tokens", as_dict=True) word_emb = tf.expand_dims(lm_embeddings["word_emb"], 3) # [B, slen, 512] lm_emb_op = tf.concat([ tf.concat([word_emb, word_emb], 2), # [B, slen, 1024, 1] tf.expand_dims(lm_embeddings["lstm_outputs1"], 3),
fl += 1 else: if antecedent == (-1, -1): fn += 1 elif span_cluster_id != gold_to_cluster_id[i][antecedent]: wl += 1 else: correct += 1 return num_clusters, num_singular_clusters, num_plural_clusters, num_mixed_clusters, num_mixed_ambiguous, fl, fn, wl, correct, \ num_non_gold, num_total_spans, s_to_p, p_to_s if __name__ == '__main__': gpu_id = 6 util.set_gpus(gpu_id) experiments = [('train_spanbert_large_ee', 'May14_06-02-15'), ('train_spanbert_large_ee', 'May14_06-05-42'), ('train_spanbert_large_lr2e-4_ee', 'May14_06-03-24'), ('train_spanbert_large_lr2e-4_ee', 'May14_06-10-51')] results_final = None for experiment in experiments: results = analyze(*experiment) if results_final is None: results_final = results else: results_final = [r + results[i] for i, r in enumerate(results_final)] # print('%s_%s: # clusters: %d; # singular clusters: %d; # plural clusters: %d; # mixed clusters: %d; '
import os import sys sys.path.append(os.getcwd()) import collections import operator import numpy as np import tensorflow as tf import coref_model as cm import util import conll import metrics if __name__ == "__main__": if "GPU" in os.environ: util.set_gpus(int(os.environ["GPU"])) else: util.set_gpus() names = sys.argv[1:] print("Ensembling models from {}.".format(names)) configs = util.get_config("experiments.conf") main_config = configs[names[0]] model = cm.CorefModel(main_config) model.load_eval_data() saver = tf.train.Saver() with tf.Session() as session:
if len(sys.argv) > 1: name = sys.argv[1] else: name = os.environ["EXP"] config = util.get_config("experiments.conf")[name] print 'config' report_frequency = config["report_frequency"] config["log_dir"] = util.mkdirs(os.path.join(config["log_root"], name)) util.print_config(config) print os.environ # if "GPU" in os.environ: # gpus = [int(g) for g in os.environ["GPU"].split(",")] # util.set_gpus(*gpus) # else: util.set_gpus(config['gpu']) data = LSGNData(config) coref_config = copy.deepcopy(config) coref_config['train_path'] = config['train_path_coref'] coref_config['lm_path'] = config['lm_path_coref'] coref_config['eval_path'] = config['eval_path_coref'] coref_config['lm_path_dev'] = config['lm_path_dev_coref'] coref_config['ner_weight'] = 0 coref_config['coref_weight'] = 1 coref_config['relation_weight'] = 0 # coref_config['batch_size'] = 30 coref_config['coref_depth'] = 0 model = SRLModel(data, config) if config['coref_freq']: coref_data = LSGNData(coref_config)
sys.path.append(os.getcwd()) import time import random import shutil import numpy as np import tensorflow as tf import coref_model as cm import util def copy_checkpoint(source, target): for ext in (".index", ".data-00000-of-00001"): shutil.copyfile(source + ext, target + ext) if __name__ == "__main__": util.set_gpus() if len(sys.argv) > 1: name = sys.argv[1] print("Running experiment: {} (from command-line argument).".format(name)) else: name = os.environ["EXP"] print("Running experiment: {} (from environment variable).".format(name)) config = util.get_config("experiments.conf")[name] config["log_dir"] = util.mkdirs(os.path.join(config["log_root"], name)) util.print_config(config) model = cm.CorefModel(config) saver = tf.train.Saver()
name = sys.argv[1] else: name = os.environ["EXP"] config = util.get_config("experiments.conf")[name] print('config') report_frequency = config["report_frequency"] config["log_dir"] = util.mkdirs(os.path.join(config["log_root"], name)) util.print_config(config) print((os.environ)) # if "GPU" in os.environ: # gpus = [int(g) for g in os.environ["GPU"].split(",")] # util.set_gpus(*gpus) # else: util.set_gpus(config['gpu_id']) data = LSGNData(config) model = SRLModel(data, config) saver = tf.train.Saver() init_op = tf.global_variables_initializer() log_dir = config["log_dir"] assert not ("final" in name ) # Make sure we don't override a finalized checkpoint. writer = tf.summary.FileWriter(log_dir, flush_secs=20) # Create a "supervisor", which oversees the training process. sv = tf.train.Supervisor(logdir=log_dir, init_op=init_op, saver=saver,