def run(length=None, seed=None): args = Hyperparameters() if seed: args.seed = seed if length: args.length = length # Prepare parameters. with open(os.path.join(args.init_dir, 'result.json'), 'r') as f: result = json.load(f) params = result['params'] if args.model_path: best_model = args.model_path else: best_model = result['best_model'] best_valid_ppl = result['best_valid_ppl'] if 'encoding' in result: args.encoding = result['encoding'] else: args.encoding = 'utf-8' args.vocab_file = os.path.join(args.init_dir, 'vocab.json') vocab_index_dict, index_vocab_dict, vocab_size = load_vocab(args.vocab_file, args.encoding) # Create graphs logging.info('Creating graph') graph = tf.Graph() with graph.as_default(): with tf.name_scope('evaluation'): test_model = CharRNN(is_training=False, use_batch=False, **params) saver = tf.train.Saver(name='checkpoint_saver') if args.evaluate: example_batches = BatchGenerator(args.example_text, 1, 1, vocab_size, vocab_index_dict, index_vocab_dict) with tf.Session(graph=graph) as session: saver.restore(session, best_model) ppl = test_model.run_epoch(session, len(args.example_text), example_batches, is_training=False)[0] print('Example text is: %s' % args.example_text) print('Perplexity is: %s' % ppl) else: if args.seed >= 0: np.random.seed(args.seed) # Sampling a sequence with tf.Session(graph=graph) as session: saver.restore(session, best_model) sample = test_model.sample_seq(session, args.length, args.start_text, vocab_index_dict, index_vocab_dict, temperature=args.temperature, max_prob=args.max_prob) # print('Sampled text is:\n%s' % sample) return sample
def eval(): # Load graph g = Graph(is_training=False) print("Graph loaded") # Load data #X, Sources, Targets = load_test_data() char2idx, idx2char = load_vocab() inp = "ذلك الكتاب لا ريب فيه هدى للمتقين" x = [char2idx[c] for c in inp + 'E'] x += [0] * (hp.maxlen - len(x)) x = np.array(x) x = x.reshape(1, -1) # x = x.repeat(hp.batch_size,axis=0) # X, Sources, Targets = X[:33], Sources[:33], Targets[:33] # Start session with g.graph.as_default(): sv = tf.train.Supervisor(logdir=hp.logdir) # with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: with sv.managed_session() as sess: ## Restore parameters #sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)) #print("Restored!") ## Get model name #mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name ## Inference while (1): testVar = input("input:") x = [char2idx[c] for c in testVar + 'E'] x += [0] * (hp.maxlen - len(x)) x = np.array(x) x = x.reshape(1, -1) #preds = np.zeros((hp.batch_size, hp.maxlen), np.int32) preds = np.zeros((1, hp.maxlen), np.int32) for j in range(hp.maxlen): _preds = sess.run(g.preds, {g.x: x, g.y: preds}) #print(j,"->","".join(idx2char[idx] for idx in _preds[0]).split("E")[0].strip()) preds[:, j] = _preds[:, j] got = "".join(idx2char[idx] for idx in preds[0]).split("E")[0].strip() print("Source: ", testVar) print("got : ", got)
def main(): parser = argparse.ArgumentParser() # Parameters for using saved best models. parser.add_argument( '--init_dir', type=str, default='', help='continue from the outputs in the given directory') # Parameters for picking which model to use. parser.add_argument( '--model_path', type=str, default='', help='path to the model file like output/best_model/model-40.') # Parameters for sampling. parser.add_argument('--temperature', type=float, default=1.0, help=('Temperature for sampling from softmax: ' 'higher temperature, more random; ' 'lower temperature, more greedy.')) parser.add_argument( '--max_prob', dest='max_prob', action='store_true', help='always pick the most probable next character in sampling') parser.set_defaults(max_prob=False) parser.add_argument('--start_text', type=str, default='The meaning of life is ', help='the text to start with') parser.add_argument('--length', type=int, default=100, help='length of sampled sequence') parser.add_argument('--seed', type=int, default=-1, help=('seed for sampling to replicate results, ' 'an integer between 0 and 4294967295.')) # Parameters for evaluation (computing perplexity of given text). parser.add_argument('--evaluate', dest='evaluate', action='store_true', help='compute the perplexity of given text') parser.set_defaults(evaluate=False) parser.add_argument('--example_text', type=str, default='The meaning of life is 42.', help='compute the perplexity of given example text.') # Parameters for debugging. parser.add_argument('--debug', dest='debug', action='store_true', help='show debug information') parser.set_defaults(debug=False) args = parser.parse_args() # Prepare parameters. with open(os.path.join(args.init_dir, 'result.json'), 'r') as f: result = json.load(f) params = result['params'] if args.model_path: best_model = args.model_path else: best_model = result['best_model'] best_valid_ppl = result['best_valid_ppl'] if 'encoding' in result: args.encoding = result['encoding'] else: args.encoding = 'utf-8' args.vocab_file = os.path.join(args.init_dir, 'vocab.json') vocab_index_dict, index_vocab_dict, vocab_size = load_vocab( args.vocab_file, args.encoding) # Create graphs logging.info('Creating graph') graph = tf.Graph() with graph.as_default(): with tf.name_scope('evaluation'): test_model = CharRNN(is_training=False, use_batch=False, **params) saver = tf.train.Saver(name='checkpoint_saver') if args.evaluate: example_batches = BatchGenerator(args.example_text, 1, 1, vocab_size, vocab_index_dict, index_vocab_dict) with tf.Session(graph=graph) as session: saver.restore(session, best_model) ppl = test_model.run_epoch(session, len(args.example_text), example_batches, is_training=False)[0] print('Example text is: %s' % args.example_text) print('Perplexity is: %s' % ppl) else: if args.seed >= 0: np.random.seed(args.seed) # Sampling a sequence start_text = args.start_text with tf.Session(graph=graph) as session: saver.restore(session, best_model) while True: sample = test_model.sample_seq(session, args.length, start_text, vocab_index_dict, index_vocab_dict, temperature=args.temperature, max_prob=args.max_prob) print(sample[30:]) start_text = sample[:30] sleep(4.0) return sample
from flask import Flask, request from train import load_vocab from torch.autograd import Variable import torch import json import sys PORT = 5000 MODEL_PATH = './model/model3.pkl' ### USE_CUDA = torch.cuda.is_available() PADDING = '<pad>' UNK = '<unk>' SOS = '<s>' EOS = '</s>' char2idx = load_vocab() idx2char = list(char2idx.keys()) model = torch.load(MODEL_PATH) if USE_CUDA: model = model.cuda() ### app = Flask(__name__, static_url_path='') @app.route('/') def index(): return app.send_static_file('index.html') @app.route('/couplet')
from train import Graph, hp, load_vocab import numpy as np import codecs import re import os import unicodedata from expand import normalize_numbers from builtins import str as unicode dirname = os.path.dirname(__file__) cmu = cmudict.dict() # Load vocab g2idx, idx2g, p2idx, idx2p = load_vocab() # Load Graph g = tf.Graph() with g.as_default(): with tf.device('/cpu:0'): graph = Graph() print("Graph loaded for g2p") saver = tf.train.Saver() config = tf.ConfigProto( device_count={'GPU': 0}, gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.0001)) g_sess = None # global session
def main(): parser = argparse.ArgumentParser() # Parameters for using saved best models. parser.add_argument('--init_dir', type=str, default='', help='continue from the outputs in the given directory') # Parameters for sampling. parser.add_argument('--temperature', type=float, default=1.0, help=('Temperature for sampling from softmax: ' 'higher temperature, more random; ' 'lower temperature, more greedy.')) parser.add_argument('--max_prob', dest='max_prob', action='store_true', help='always pick the most probable next character in sampling') parser.set_defaults(max_prob=False) parser.add_argument('--start_text', type=str, default='The meaning of life is ', help='the text to start with') parser.add_argument('--length', type=int, default=100, help='length of sampled sequence') parser.add_argument('--seed', type=int, default=-1, help=('seed for sampling to replicate results, ' 'an integer between 0 and 4294967295.')) # Parameters for evaluation (computing perplexity of given text). parser.add_argument('--evaluate', dest='evaluate', action='store_true', help='compute the perplexity of given text') parser.set_defaults(evaluate=False) parser.add_argument('--example_text', type=str, default='The meaning of life is 42.', help='compute the perplexity of given example text.') # Parameters for debugging. parser.add_argument('--debug', dest='debug', action='store_true', help='show debug information') parser.set_defaults(debug=False) args = parser.parse_args() # Prepare parameters. with open(os.path.join(args.init_dir, 'result.json'), 'r') as f: result = json.load(f) params = result['params'] best_model = result['best_model'] best_valid_ppl = result['best_valid_ppl'] if 'encoding' in result: args.encoding = result['encoding'] else: args.encoding = 'utf-8' args.vocab_file = os.path.join(args.init_dir, 'vocab.json') vocab_index_dict, index_vocab_dict, vocab_size = load_vocab(args.vocab_file, args.encoding) # Create graphs logging.info('Creating graph') graph = tf.Graph() with graph.as_default(): with tf.name_scope('evaluation'): test_model = CharRNN(is_training=False, use_batch=False, **params) saver = tf.train.Saver(name='checkpoint_saver') if args.evaluate: example_batches = BatchGenerator(args.example_text, 1, 1, vocab_size, vocab_index_dict, index_vocab_dict) with tf.Session(graph=graph) as session: saver.restore(session, best_model) ppl = test_model.run_epoch(session, len(args.example_text), example_batches, is_training=False)[0] print('Example text is: %s' % args.example_text) print('Perplexity is: %s' % ppl) else: if args.seed >= 0: np.random.seed(args.seed) # Sampling a sequence with tf.Session(graph=graph) as session: saver.restore(session, best_model) sample = test_model.sample_seq(session, args.length, args.start_text, vocab_index_dict, index_vocab_dict, temperature=args.temperature, max_prob=args.max_prob) print('Sampled text is:\n%s' % sample) return sample
def main(): parser = argparse.ArgumentParser() # Parameters for using saved best models. parser.add_argument( '--init_dir', type=str, default='', help='continue from the outputs in the given directory') # Parameters for picking which model to use. parser.add_argument( '--model_path', type=str, default='', help='path to the model file like output/best_model/model-40.') # Parameters for sampling. parser.add_argument('--temperature', type=float, default=1.0, help=('Temperature for sampling from softmax: ' 'higher temperature, more random; ' 'lower temperature, more greedy.')) parser.add_argument( '--max_prob', dest='max_prob', action='store_true', help='always pick the most probable next character in sampling') parser.set_defaults(max_prob=False) parser.add_argument('--seed', type=int, default=-1, help=('seed for sampling to replicate results, ' 'an integer between 0 and 4294967295.')) # Parameters for debugging. parser.add_argument('--debug', dest='debug', action='store_true', help='show debug information') parser.set_defaults(debug=False) args = parser.parse_args() # Prepare parameters. with open(os.path.join(args.init_dir, 'result.json'), 'r') as f: result = json.load(f) params = result['params'] if args.model_path: best_model = args.model_path else: best_model = result['best_model'] best_valid_ppl = result['best_valid_ppl'] if 'encoding' in result: args.encoding = result['encoding'] else: args.encoding = 'utf-8' args.vocab_file = os.path.join(args.init_dir, 'vocab.json') vocab_index_dict, index_vocab_dict, vocab_size = load_vocab( args.vocab_file, args.encoding) # Create graphs graph = tf.Graph() with graph.as_default(): with tf.name_scope('evaluation'): test_model = CharRNN(is_training=False, use_batch=False, **params) saver = tf.train.Saver(name='checkpoint_saver') if args.seed >= 0: np.random.seed(args.seed) data = pd.read_csv('data/test_0.csv', encoding='utf8') y = [] y_hat = [] n = len(data.index) with tf.Session(graph=graph) as session: saver.restore(session, best_model) for i, row in data.iterrows(): # if i % 10 == 0: # print('{} of {} examples tested'.format(i, n)) text = row['processed_text'] + '``' has_citation = int(row['has_citation']) y.append(has_citation) sample = test_model.sample_seq(session, 1, text, vocab_index_dict, index_vocab_dict, temperature=args.temperature, max_prob=args.max_prob) print(sample) input() try: predicted_label = int(sample[-1]) except ValueError: print('Did not predict 1 or 0.') predicted_label = 0 y_hat.append(predicted_label) if predicted_label == 1: print('Actual label was: ', str(has_citation)) print(text) try: roc_auc = metrics.roc_auc_score(y, y_hat) except ValueError: roc_auc = 'undefined' f1_macro = metrics.f1_score(y, y_hat, average='macro') acc = metrics.accuracy_score(y, y_hat) print('Predicted {} hits. There should be {} hits'.format( sum(y_hat), sum(y))) print('roc_auc: {}\nf1_macro:{}\nacc:{}'.format(roc_auc, f1_macro, acc)) df = pd.DataFrame() df['y'] = y df['y_hat'] = y_hat df.to_csv(args.init_dir + '/predictions_temper{}_maxprob{}.csv'.format( args.temperature, args.max_prob)) with open('y_hat.txt', 'w') as f: f.write('\n'.join([str(x) for x in y_hat]))