def do_shell(args): config = Config(args.model_path) helper = ModelHelper.load(args.model_path) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] with tf.Graph().as_default(): logger.info("Building model...", ) start = time.time() model = WindowModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) saver.restore(session, model.config.model_output) print("""Welcome! You can use this shell to explore the behavior of your model. Please enter sentences with spaces between tokens, e.g., input> Germany 's representative to the European Union 's veterinary committee . """) while True: # Create simple REPL try: sentence = raw_input("input> ") tokens = sentence.strip().split(" ") for sentence, _, predictions in model.output(session, [(tokens, ["O"] * len(tokens))]): predictions = [LBLS[l] for l in predictions] print_sentence(sys.stdout, sentence, [""] * len(tokens), predictions) except EOFError: print("Closing session.") break
def do_train(args): # Set up some parameters. config = Config() helper, train, dev, train_raw, dev_raw = load_and_preprocess_data(args) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] helper.save(config.output_path) handler = logging.FileHandler(config.log_output) handler.setLevel(logging.DEBUG) handler.setFormatter(logging.Formatter('%(asctime)s:%(levelname)s: %(message)s')) logging.getLogger().addHandler(handler) report = None # Report(Config.eval_output) with tf.Graph().as_default(): logger.info("Building model...", ) start = time.time() model = WindowModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) model.fit(session, saver, train, dev) if report: report.log_output(model.output(session, dev_raw)) report.save() else: # Save predictions in a text file. output = model.output(session, dev_raw) sentences, labels, predictions = zip(*output) predictions = [[LBLS[l] for l in preds] for preds in predictions] output = zip(sentences, labels, predictions) with open(model.config.conll_output, 'w') as f: write_conll(f, output) with open(model.config.eval_output, 'w') as f: for sentence, labels, predictions in output: print_sentence(f, sentence, labels, predictions)
def do_test2(args): logger.info("Testing implementation of WindowModel") config = Config() helper, train, dev, train_raw, dev_raw = load_and_preprocess_data(args) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] with tf.Graph().as_default(): logger.info("Building model...", ) start = time.time() model = WindowModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = None with tf.Session() as session: session.run(init) model.fit(session, saver, train, dev) logger.info("Model did not crash!") logger.info("Passed!")
def do_evaluate(args): config = Config(args.model_path) helper = ModelHelper.load(args.model_path) input_data = read_conll(args.data) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] with tf.Graph().as_default(): logger.info("Building model...", ) start = time.time() model = WindowModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) saver.restore(session, model.config.model_output) for sentence, labels, predictions in model.output(session, input_data): predictions = [LBLS[l] for l in predictions] print_sentence(args.output, sentence, labels, predictions)
def do_shell(args): config = Config(args) helper = ModelHelper.load(args.model_path) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] with tf.Graph().as_default(): logger.info("Building model...", ) start = time.time() model = RNNModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) saver.restore(session, model.config.model_output) print("""Welcome! You can use this shell to explore the behavior of your model. Please enter sentences with spaces between tokens, e.g., input> Germany 's representative to the European Union 's veterinary committee . """) while True: # Create simple REPL try: sentence = raw_input("input> ") tokens = sentence.strip().split(" ") for sentence, _, predictions in model.output( session, [(tokens, ["O"] * len(tokens))]): predictions = [LBLS[l] for l in predictions] print_sentence(sys.stdout, sentence, [""] * len(tokens), predictions) except EOFError: print("Closing session.") break
def do_evaluate(args): config = Config(args) helper = ModelHelper.load(args.model_path) input_data = read_conll(args.data) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] with tf.Graph().as_default(): logger.info("Building model...", ) start = time.time() model = RNNModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as session: session.run(init) saver.restore(session, model.config.model_output) for sentence, labels, predictions in model.output( session, input_data): predictions = [LBLS[l] for l in predictions] print_sentence(args.output, sentence, labels, predictions)
def do_train(args): # Set up some parameters. config = Config(args) helper, train, dev, train_raw, dev_raw = load_and_preprocess_data(args) embeddings = load_embeddings(args, helper) config.embed_size = embeddings.shape[1] helper.save(config.output_path) handler = logging.FileHandler(config.log_output) handler.setLevel(logging.DEBUG) handler.setFormatter(logging.Formatter('%(asctime)s:%(levelname)s: %(message)s')) logging.getLogger().addHandler(handler) report = None #Report(Config.eval_output) logger.info("Building model...",) start = time.time() model = RNNModel(helper, config, embeddings) logger.info("took %.2f seconds", time.time() - start) model.fit(train, dev) if report: report.log_output(model.output(dev_raw)) report.save() else: # Save predictions in a text file. output = model.output(dev_raw) sentences, labels, predictions = zip(*output) predictions = [[LBLS[l] for l in preds] for preds in predictions] output = zip(sentences, labels, predictions) with open(model.config.conll_output, 'w') as f: write_conll(f, output) with open(model.config.eval_output, 'w') as f: for sentence, labels, predictions in output: print_sentence(f, sentence, labels, predictions)
print 'What should the output file name be?' outputFileName = raw_input() ###################################### ## get the data ## ###################################### # load in the data debug = False if len(sys.argv) > 2 and sys.argv[2] == "debug": debug = True helper, train_final_data, dev_final_data, test_final_data, train, dev, test = load_and_preprocess_data( debug) pretrained_embeddings = load_embeddings( helper, vocabPath="../Vectors/gloveVocab.txt", vectorPath="../Vectors/glove.6B.200d.txt", wordFirst=True, embed_size=200) Config.embed_size = pretrained_embeddings.shape[1] # for later neverOpened_gold = True neverOpened_test = True ###################################### ## define graph ## ###################################### # define placeholders description_input_placeholder = tf.placeholder(
import json from data_util import load_embeddings batch_size = 16 data_path = '/content/bbc' with open('/content/auth_id/tokenToIndex', 'r') as f: try: wordToIndex = json.load(f) # if the file is empty the ValueError will be thrown except ValueError: wordToIndex = {} glove_path = "/content/glove.6B.50d.txt" glove_vector = load_embeddings(glove_path, 50) # load the glove vectors #auth_sent_num = fdt.file2auth_sent_num(data_path) # read in the training data #auth_sentbundle_num = fdt.file2auth_sentbundle_num(data_path, 3)[1:1000] auth_news_num = fdt.file2auth_news_num(data_path) ind = np.arange(len(auth_news_num)) np.random.shuffle(ind) index = ind raw_data = [auth_news_num[i] for i in index ] batch_list = rmb.process_word2num(raw_data, wordToIndex, glove_vector,24) batch_list_bundle = rmb.pack_batch_list(batch_list, batch_size)