def run(length=None, seed=None):
    args = Hyperparameters()
    if seed:
        args.seed = seed
    if length:
        args.length = length
    # Prepare parameters.
    with open(os.path.join(args.init_dir, 'result.json'), 'r') as f:
        result = json.load(f)
    params = result['params']

    if args.model_path:    
        best_model = args.model_path
    else:
        best_model = result['best_model']

    best_valid_ppl = result['best_valid_ppl']
    if 'encoding' in result:
        args.encoding = result['encoding']
    else:
        args.encoding = 'utf-8'
    args.vocab_file = os.path.join(args.init_dir, 'vocab.json')
    vocab_index_dict, index_vocab_dict, vocab_size = load_vocab(args.vocab_file, args.encoding)

    # Create graphs
    logging.info('Creating graph')
    graph = tf.Graph()
    with graph.as_default():
        with tf.name_scope('evaluation'):
            test_model = CharRNN(is_training=False, use_batch=False, **params)
            saver = tf.train.Saver(name='checkpoint_saver')

    if args.evaluate:
        example_batches = BatchGenerator(args.example_text, 1, 1, vocab_size,
                                         vocab_index_dict, index_vocab_dict)
        with tf.Session(graph=graph) as session:
            saver.restore(session, best_model)
            ppl = test_model.run_epoch(session, len(args.example_text),
                                        example_batches,
                                        is_training=False)[0]
            print('Example text is: %s' % args.example_text)
            print('Perplexity is: %s' % ppl)
    else:
        if args.seed >= 0:
            np.random.seed(args.seed)
        # Sampling a sequence 
        with tf.Session(graph=graph) as session:
            saver.restore(session, best_model)
            sample = test_model.sample_seq(session, args.length, args.start_text,
                                            vocab_index_dict, index_vocab_dict,
                                            temperature=args.temperature,
                                            max_prob=args.max_prob)
            # print('Sampled text is:\n%s' % sample)
    return sample
def eval():
    # Load graph
    g = Graph(is_training=False)
    print("Graph loaded")

    # Load data
    #X, Sources, Targets = load_test_data()
    char2idx, idx2char = load_vocab()
    inp = "ذلك الكتاب لا ريب فيه هدى للمتقين"
    x = [char2idx[c] for c in inp + 'E']
    x += [0] * (hp.maxlen - len(x))
    x = np.array(x)
    x = x.reshape(1, -1)
    #	x = x.repeat(hp.batch_size,axis=0)
    #	  X, Sources, Targets = X[:33], Sources[:33], Targets[:33]

    # Start session
    with g.graph.as_default():
        sv = tf.train.Supervisor(logdir=hp.logdir)
        #		with sv.managed_session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        with sv.managed_session() as sess:
            ## Restore parameters
            #sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
            #print("Restored!")

            ## Get model name
            #mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name

            ## Inference
            while (1):
                testVar = input("input:")
                x = [char2idx[c] for c in testVar + 'E']
                x += [0] * (hp.maxlen - len(x))
                x = np.array(x)
                x = x.reshape(1, -1)
                #preds = np.zeros((hp.batch_size, hp.maxlen), np.int32)
                preds = np.zeros((1, hp.maxlen), np.int32)
                for j in range(hp.maxlen):
                    _preds = sess.run(g.preds, {g.x: x, g.y: preds})
                    #print(j,"->","".join(idx2char[idx] for idx in _preds[0]).split("E")[0].strip())
                    preds[:, j] = _preds[:, j]
                got = "".join(idx2char[idx]
                              for idx in preds[0]).split("E")[0].strip()
                print("Source: ", testVar)
                print("got : ", got)
def main():
    parser = argparse.ArgumentParser()

    # Parameters for using saved best models.
    parser.add_argument(
        '--init_dir',
        type=str,
        default='',
        help='continue from the outputs in the given directory')

    # Parameters for picking which model to use.
    parser.add_argument(
        '--model_path',
        type=str,
        default='',
        help='path to the model file like output/best_model/model-40.')

    # Parameters for sampling.
    parser.add_argument('--temperature',
                        type=float,
                        default=1.0,
                        help=('Temperature for sampling from softmax: '
                              'higher temperature, more random; '
                              'lower temperature, more greedy.'))

    parser.add_argument(
        '--max_prob',
        dest='max_prob',
        action='store_true',
        help='always pick the most probable next character in sampling')

    parser.set_defaults(max_prob=False)

    parser.add_argument('--start_text',
                        type=str,
                        default='The meaning of life is ',
                        help='the text to start with')

    parser.add_argument('--length',
                        type=int,
                        default=100,
                        help='length of sampled sequence')

    parser.add_argument('--seed',
                        type=int,
                        default=-1,
                        help=('seed for sampling to replicate results, '
                              'an integer between 0 and 4294967295.'))

    # Parameters for evaluation (computing perplexity of given text).
    parser.add_argument('--evaluate',
                        dest='evaluate',
                        action='store_true',
                        help='compute the perplexity of given text')
    parser.set_defaults(evaluate=False)
    parser.add_argument('--example_text',
                        type=str,
                        default='The meaning of life is 42.',
                        help='compute the perplexity of given example text.')

    # Parameters for debugging.
    parser.add_argument('--debug',
                        dest='debug',
                        action='store_true',
                        help='show debug information')
    parser.set_defaults(debug=False)

    args = parser.parse_args()

    # Prepare parameters.
    with open(os.path.join(args.init_dir, 'result.json'), 'r') as f:
        result = json.load(f)
    params = result['params']

    if args.model_path:
        best_model = args.model_path
    else:
        best_model = result['best_model']

    best_valid_ppl = result['best_valid_ppl']
    if 'encoding' in result:
        args.encoding = result['encoding']
    else:
        args.encoding = 'utf-8'
    args.vocab_file = os.path.join(args.init_dir, 'vocab.json')
    vocab_index_dict, index_vocab_dict, vocab_size = load_vocab(
        args.vocab_file, args.encoding)

    # Create graphs
    logging.info('Creating graph')
    graph = tf.Graph()
    with graph.as_default():
        with tf.name_scope('evaluation'):
            test_model = CharRNN(is_training=False, use_batch=False, **params)
            saver = tf.train.Saver(name='checkpoint_saver')

    if args.evaluate:
        example_batches = BatchGenerator(args.example_text, 1, 1, vocab_size,
                                         vocab_index_dict, index_vocab_dict)
        with tf.Session(graph=graph) as session:
            saver.restore(session, best_model)
            ppl = test_model.run_epoch(session,
                                       len(args.example_text),
                                       example_batches,
                                       is_training=False)[0]
            print('Example text is: %s' % args.example_text)
            print('Perplexity is: %s' % ppl)
    else:
        if args.seed >= 0:
            np.random.seed(args.seed)
        # Sampling a sequence
        start_text = args.start_text
        with tf.Session(graph=graph) as session:
            saver.restore(session, best_model)
            while True:
                sample = test_model.sample_seq(session,
                                               args.length,
                                               start_text,
                                               vocab_index_dict,
                                               index_vocab_dict,
                                               temperature=args.temperature,
                                               max_prob=args.max_prob)
                print(sample[30:])
                start_text = sample[:30]
                sleep(4.0)
        return sample
Exemple #4
0
from flask import Flask, request
from train import load_vocab
from torch.autograd import Variable
import torch
import json
import sys
PORT = 5000
MODEL_PATH = './model/model3.pkl'

###
USE_CUDA = torch.cuda.is_available()
PADDING = '<pad>'
UNK = '<unk>'
SOS = '<s>'
EOS = '</s>'
char2idx = load_vocab()
idx2char = list(char2idx.keys())
model = torch.load(MODEL_PATH)
if USE_CUDA:
    model = model.cuda()
###

app = Flask(__name__, static_url_path='')


@app.route('/')
def index():
    return app.send_static_file('index.html')


@app.route('/couplet')
Exemple #5
0
from train import Graph, hp, load_vocab
import numpy as np
import codecs
import re
import os
import unicodedata
from expand import normalize_numbers
from builtins import str as unicode

dirname = os.path.dirname(__file__)

cmu = cmudict.dict()

# Load vocab
g2idx, idx2g, p2idx, idx2p = load_vocab()

# Load Graph
g = tf.Graph()
with g.as_default():
    with tf.device('/cpu:0'):
        graph = Graph()
        print("Graph loaded for g2p")
        saver = tf.train.Saver()
config = tf.ConfigProto(
    device_count={'GPU': 0},
    gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.0001))

g_sess = None  # global session

def main():
    parser = argparse.ArgumentParser()
    
    # Parameters for using saved best models.
    parser.add_argument('--init_dir', type=str, default='',
                        help='continue from the outputs in the given directory')

    # Parameters for sampling.
    parser.add_argument('--temperature', type=float,
                        default=1.0,
                        help=('Temperature for sampling from softmax: '
                              'higher temperature, more random; '
                              'lower temperature, more greedy.'))
    
    parser.add_argument('--max_prob', dest='max_prob', action='store_true',
                        help='always pick the most probable next character in sampling')

    parser.set_defaults(max_prob=False)
    
    parser.add_argument('--start_text', type=str,
                        default='The meaning of life is ',
                        help='the text to start with')

    parser.add_argument('--length', type=int,
                        default=100,
                        help='length of sampled sequence')

    parser.add_argument('--seed', type=int,
                        default=-1,
                        help=('seed for sampling to replicate results, '
                              'an integer between 0 and 4294967295.'))

    # Parameters for evaluation (computing perplexity of given text).
    parser.add_argument('--evaluate', dest='evaluate', action='store_true',
                        help='compute the perplexity of given text')
    parser.set_defaults(evaluate=False)
    parser.add_argument('--example_text', type=str,
                        default='The meaning of life is 42.',
                        help='compute the perplexity of given example text.')

    # Parameters for debugging.
    parser.add_argument('--debug', dest='debug', action='store_true',
                        help='show debug information')
    parser.set_defaults(debug=False)
    
    args = parser.parse_args()

    # Prepare parameters.
    with open(os.path.join(args.init_dir, 'result.json'), 'r') as f:
        result = json.load(f)
    params = result['params']
    best_model = result['best_model']
    best_valid_ppl = result['best_valid_ppl']
    if 'encoding' in result:
        args.encoding = result['encoding']
    else:
        args.encoding = 'utf-8'
    args.vocab_file = os.path.join(args.init_dir, 'vocab.json')
    vocab_index_dict, index_vocab_dict, vocab_size = load_vocab(args.vocab_file, args.encoding)

    # Create graphs
    logging.info('Creating graph')
    graph = tf.Graph()
    with graph.as_default():
        with tf.name_scope('evaluation'):
            test_model = CharRNN(is_training=False, use_batch=False, **params)
            saver = tf.train.Saver(name='checkpoint_saver')

    if args.evaluate:
        example_batches = BatchGenerator(args.example_text, 1, 1, vocab_size,
                                         vocab_index_dict, index_vocab_dict)
        with tf.Session(graph=graph) as session:
            saver.restore(session, best_model)
            ppl = test_model.run_epoch(session, len(args.example_text),
                                        example_batches,
                                        is_training=False)[0]
            print('Example text is: %s' % args.example_text)
            print('Perplexity is: %s' % ppl)
    else:
        if args.seed >= 0:
            np.random.seed(args.seed)
        # Sampling a sequence 
        with tf.Session(graph=graph) as session:
            saver.restore(session, best_model)
            sample = test_model.sample_seq(session, args.length, args.start_text,
                                            vocab_index_dict, index_vocab_dict,
                                            temperature=args.temperature,
                                            max_prob=args.max_prob)
            print('Sampled text is:\n%s' % sample)
        return sample
Exemple #7
0
def main():
    parser = argparse.ArgumentParser()

    # Parameters for using saved best models.
    parser.add_argument(
        '--init_dir',
        type=str,
        default='',
        help='continue from the outputs in the given directory')

    # Parameters for picking which model to use.
    parser.add_argument(
        '--model_path',
        type=str,
        default='',
        help='path to the model file like output/best_model/model-40.')

    # Parameters for sampling.
    parser.add_argument('--temperature',
                        type=float,
                        default=1.0,
                        help=('Temperature for sampling from softmax: '
                              'higher temperature, more random; '
                              'lower temperature, more greedy.'))

    parser.add_argument(
        '--max_prob',
        dest='max_prob',
        action='store_true',
        help='always pick the most probable next character in sampling')

    parser.set_defaults(max_prob=False)

    parser.add_argument('--seed',
                        type=int,
                        default=-1,
                        help=('seed for sampling to replicate results, '
                              'an integer between 0 and 4294967295.'))

    # Parameters for debugging.
    parser.add_argument('--debug',
                        dest='debug',
                        action='store_true',
                        help='show debug information')
    parser.set_defaults(debug=False)

    args = parser.parse_args()

    # Prepare parameters.
    with open(os.path.join(args.init_dir, 'result.json'), 'r') as f:
        result = json.load(f)
    params = result['params']

    if args.model_path:
        best_model = args.model_path
    else:
        best_model = result['best_model']

    best_valid_ppl = result['best_valid_ppl']
    if 'encoding' in result:
        args.encoding = result['encoding']
    else:
        args.encoding = 'utf-8'
    args.vocab_file = os.path.join(args.init_dir, 'vocab.json')
    vocab_index_dict, index_vocab_dict, vocab_size = load_vocab(
        args.vocab_file, args.encoding)

    # Create graphs
    graph = tf.Graph()
    with graph.as_default():
        with tf.name_scope('evaluation'):
            test_model = CharRNN(is_training=False, use_batch=False, **params)
            saver = tf.train.Saver(name='checkpoint_saver')

    if args.seed >= 0:
        np.random.seed(args.seed)
    data = pd.read_csv('data/test_0.csv', encoding='utf8')
    y = []
    y_hat = []
    n = len(data.index)

    with tf.Session(graph=graph) as session:
        saver.restore(session, best_model)

        for i, row in data.iterrows():
            # if i % 10 == 0:
            #     print('{} of {} examples tested'.format(i, n))
            text = row['processed_text'] + '``'
            has_citation = int(row['has_citation'])
            y.append(has_citation)
            sample = test_model.sample_seq(session,
                                           1,
                                           text,
                                           vocab_index_dict,
                                           index_vocab_dict,
                                           temperature=args.temperature,
                                           max_prob=args.max_prob)
            print(sample)
            input()
            try:
                predicted_label = int(sample[-1])
            except ValueError:
                print('Did not predict 1 or 0.')
                predicted_label = 0
            y_hat.append(predicted_label)
            if predicted_label == 1:
                print('Actual label was: ', str(has_citation))
                print(text)

    try:
        roc_auc = metrics.roc_auc_score(y, y_hat)
    except ValueError:
        roc_auc = 'undefined'
    f1_macro = metrics.f1_score(y, y_hat, average='macro')
    acc = metrics.accuracy_score(y, y_hat)
    print('Predicted {} hits. There should be {} hits'.format(
        sum(y_hat), sum(y)))
    print('roc_auc: {}\nf1_macro:{}\nacc:{}'.format(roc_auc, f1_macro, acc))
    df = pd.DataFrame()
    df['y'] = y
    df['y_hat'] = y_hat
    df.to_csv(args.init_dir + '/predictions_temper{}_maxprob{}.csv'.format(
        args.temperature, args.max_prob))
    with open('y_hat.txt', 'w') as f:
        f.write('\n'.join([str(x) for x in y_hat]))