コード例 #1
0
    #preModel = preTrainModel()
    #saver = tf.train.Saver({"trained_embedding": embedding})
    #Embedding training
    #trainingSession(preModel)
    #preTraining = False

    model = model()
    total_loss = loss(model)
    train_op = train(total_loss, lr)
    init = tf.global_variables_initializer()
    sess.run(init)

    print("Starting Training... Please Wait...")
    best_f1 = -np.inf
    current_lr = lr
    best_epoch = 0
    for fold in range(0, 5):
        # load the dataset
        train_set, valid_set, test_set, dic = load.atisfold(fold)
        idx2label = dict((k, v) for v, k in dic['labels2idx'].items())
        idx2word = dict((k, v) for v, k in dic['words2idx'].items())

        train_lex, train_ne, train_y = train_set
        valid_lex, valid_ne, valid_y = valid_set
        test_lex, test_ne, test_y = test_set

        # maxSentenceLength = np.amax([len(i) for i in train_lex+train_ne+train_y])
        nsentences = len(train_lex)
        print("Fold ", fold + 1)
        best_f1 = trainingSession(model, best_f1)
コード例 #2
0
ファイル: jordan-forward.py プロジェクト: aby2s/is13
        'lr': 0.0627142536696559,
        'verbose': 1,
        'decay': False,  # decay on the learning rate if improvement stops
        'win': 7,  # number of words in the context window
        'bs': 9,  # number of backprop through time steps
        'nhidden': 100,  # number of hidden units
        'seed': 345,
        'emb_dimension': 100,  # dimension of word embedding
        'nepochs': 50
    }

    folder = os.path.basename(__file__).split('.')[0]
    if not os.path.exists(folder): os.mkdir(folder)

    # load the dataset
    train_set, valid_set, test_set, dic = load.atisfold(s['fold'])
    idx2label = dict((k, v) for v, k in dic['labels2idx'].items())
    idx2word = dict((k, v) for v, k in dic['words2idx'].items())

    train_lex, train_ne, train_y = train_set
    valid_lex, valid_ne, valid_y = valid_set
    test_lex, test_ne, test_y = test_set

    vocsize = len(set(reduce(\
                       lambda x, y: list(x)+list(y),\
                       train_lex+valid_lex+test_lex)))

    nclasses = len(set(reduce(\
                       lambda x, y: list(x)+list(y),\
                       train_y+test_y+valid_y)))
コード例 #3
0
ファイル: elman_forward.py プロジェクト: phdowling/is13
def main():
    settings = {
        'fold': 3,  # 5 folds 0,1,2,3,4
        'lr': 0.0627142536696559,
        'verbose': 1,
        'decay': False,  # decay on the learning rate if improvement stops
        'win': 7,  # number of words in the context window
        'bs': 9,  # number of backprop through time steps
        'nhidden': 100,  # number of hidden units
        'seed': 345,
        'emb_dimension': 100,  # dimension of word embedding
        'nepochs': 50
    }

    folder = os.path.basename(__file__).split('.')[0]

    if not os.path.exists(folder):
        os.mkdir(folder)

    # load the dataset
    train_set, valid_set, test_set, dic = load.atisfold(settings['fold'])
    idx2label = dict((k, v) for v, k in dic['labels2idx'].iteritems())
    idx2word = dict((k, v) for v, k in dic['words2idx'].iteritems())

    train_lex, train_ne, train_y = train_set
    valid_lex, valid_ne, valid_y = valid_set
    test_lex,  test_ne,  test_y = test_set

    vocsize = len(dic['words2idx'])
    nclasses = len(dic['labels2idx'])
    nsentences = len(train_lex)

    # instantiate the model
    numpy.random.seed(settings['seed'])
    random.seed(settings['seed'])

    if LOAD:
        print "Loading model from %s..." % folder

        rnn = ElmanRNNModel.load(folder)
    else:
        rnn = ElmanRNNModel(
            hidden_dims=settings['nhidden'],
            num_classes=nclasses,
            vocab_size=vocsize,
            embed_dims=settings['emb_dimension'],
            context_size=settings['win']
        )

    # train with early stopping on validation set
    best_f1 = -numpy.inf
    settings['current_lr'] = settings['lr']
    for e in xrange(settings['nepochs']):
        # shuffle
        shuffle([train_lex, train_ne, train_y], settings['seed'])
        settings['current_epoch'] = e
        tic = time.time()
        for i in xrange(nsentences):
            cwords = contextwin(train_lex[i], settings['win'])

            words = map(
                lambda x: numpy.asarray(x).astype('int32'),
                minibatch(cwords, settings['bs'])
            )

            labels = train_y[i]

            for word_batch, label_last_word in zip(words, labels):
                rnn.train(word_batch, label_last_word, settings['current_lr'])
                rnn.normalize()

            if settings['verbose']:
                print '[learning] epoch %i >> %2.2f%%' % (e, (i+1)*100./nsentences), \
                    'completed in %.2f (sec) <<\r' % (time.time()-tic),
                sys.stdout.flush()

        # evaluation // back into the real world : idx -> words
        predictions_test = [
            map(lambda x: idx2label[x],
                rnn.classify(numpy.asarray(contextwin(x, settings['win'])).astype('int32')))
            for x in test_lex
        ]

        groundtruth_test = [map(lambda x: idx2label[x], y) for y in test_y ]

        words_test = [map(lambda x: idx2word[x], w) for w in test_lex]

        predictions_valid = [
            map(
                lambda idx: idx2label[idx],
                rnn.classify(
                    numpy.asarray(contextwin(x, settings['win'])).astype('int32'))
            )
            for x in valid_lex
        ]

        groundtruth_valid = [map(lambda x: idx2label[x], y) for y in valid_y]

        words_valid = [map(lambda x: idx2word[x], w) for w in valid_lex]

        # evaluation // compute the accuracy using conlleval.pl
        res_test = conlleval(predictions_test, groundtruth_test, words_test, folder + '/current.test.txt')
        res_valid = conlleval(predictions_valid, groundtruth_valid, words_valid, folder + '/current.valid.txt')

        if res_valid['f1'] > best_f1:
            rnn.save(folder)
            best_f1 = res_valid['f1']
            if settings['verbose']:
                print 'NEW BEST: epoch', e, 'valid F1', res_valid['f1'], 'best test F1', res_test['f1'], ' '*20
            settings['vf1'], settings['vp'], settings['vr'] = res_valid['f1'], res_valid['p'], res_valid['r']
            settings['tf1'], settings['tp'], settings['tr'] = res_test['f1'],  res_test['p'],  res_test['r']
            settings['be'] = e
            subprocess.call(['mv', folder + '/current.test.txt', folder + '/best.test.txt'])
            subprocess.call(['mv', folder + '/current.valid.txt', folder + '/best.valid.txt'])
        else:
            print ''

        # learning rate decay if no improvement in 10 epochs
        if settings['decay'] and abs(settings['be'] - settings['current_epoch']) >= 10:
            settings['current_lr'] *= 0.5

        if settings['current_lr'] < 1e-5:
            break

    print 'BEST RESULT: epoch', e, 'valid F1', settings['vf1'], 'best test F1', settings['tf1'], 'with the model', folder
コード例 #4
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import data.load as load
import numpy as np
from functools import reduce


train_set, valid_set, test_set, dic = load.atisfold(3)
idx2label = dict((v,k) for k,v in dic['labels2idx'].items())
idx2word = dict((v,k) for k,v in dic['words2idx'].items())

train_lex, train_ne, train_y = train_set
valid_lex, valid_ne, valid_y = valid_set
test_lex,  test_ne,  test_y  = test_set

sent = train_lex[0]
print(list(map(lambda x: idx2word[x], sent)))

vocsize = len(set(reduce(lambda x, y: list(x)+list(y),
                         train_lex+valid_lex+test_lex)))

nclasses = len(set(reduce(lambda x, y: list(x)+list(y),
                          train_y+valid_y+test_y)))

nsentences = len(train_lex)

print("vocsize: %d, # of classes: %d, # of sentences: %d" % (vocsize, nclasses, nsentences))


def context_window(sentence, width=3):
    """
コード例 #5
0
ファイル: convert.py プロジェクト: zxch3n/is13
import random


def random_digit(n):
    num = list(range(10))
    ans = ''.join([str(random.choice(num)) for _ in range(n)])
    return ans


text = []
entities = []
is_train = []

for i in range(5):
    w2ne, w2la = {}, {}
    train, _, test, dic = ml.atisfold(i)

    w2idx, ne2idx, labels2idx = dic['words2idx'], dic['tables2idx'], dic[
        'labels2idx']

    idx2w = dict((v, k) for k, v in w2idx.iteritems())
    idx2ne = dict((v, k) for k, v in ne2idx.iteritems())
    idx2la = dict((v, k) for k, v in labels2idx.iteritems())

    test_x, test_ne, test_label = test
    train_x, train_ne, train_label = train
    wlength = 35

    for e in ['train', 'test']:
        for sw, se, sl in zip(eval(e + '_x'), eval(e + '_ne'),
                              eval(e + '_label')):
コード例 #6
0
ファイル: elman-forward.py プロジェクト: MajorTal/is13
    s = {'fold':3, # 5 folds 0,1,2,3,4
         'lr':0.0627142536696559,
         'verbose':1,
         'decay':False, # decay on the learning rate if improvement stops
         'win':7, # number of words in the context window
         'bs':9, # number of backprop through time steps
         'nhidden':100, # number of hidden units
         'seed':345,
         'emb_dimension':100, # dimension of word embedding
         'nepochs':50}

    folder = os.path.basename(__file__).split('.')[0]
    if not os.path.exists(folder): os.mkdir(folder)

    # load the dataset
    train_set, valid_set, test_set, dic = load.atisfold(s['fold'])
    idx2label = dict((k,v) for v,k in dic['labels2idx'].iteritems())
    idx2word  = dict((k,v) for v,k in dic['words2idx'].iteritems())

    train_lex, train_ne, train_y = train_set
    valid_lex, valid_ne, valid_y = valid_set
    test_lex,  test_ne,  test_y  = test_set

    vocsize = len(set(reduce(\
                       lambda x, y: list(x)+list(y),\
                       train_lex+valid_lex+test_lex)))

    nclasses = len(set(reduce(\
                       lambda x, y: list(x)+list(y),\
                       train_y+test_y+valid_y)))