Example #1
0
                try:
                    x_fvec.append(model[term.split('-')[0]])
                except KeyError:
                    # same hack as in the training process
                    x_fvec.append(model['some'])
            # map word to label_index
            if term in word2labelindx:
                # is label
                labels.append(word2labelindx[term])
            else:
                # not a label
                labels.append(word2labelindx["XXXXX"])
            termss.append(term)

        # add a PADDING-END word at the rightend (the last word in the sentence)
        labels.append(word2labelindx["</s>"])
        # remove a PADDING_START word at the begining.
        labels.pop(0)

        cwords = contextwin(x_fvec, s['win'], model["<s>"], model["</s>"])
        prediction_test = rnn.test(numpy.asarray(cwords).astype('float32'))

        prediction_test2 = []

        for term_indx in range(len(labels)):
            prediction_test2.append(prediction_test[term_indx, labels[term_indx]])

        #test_question_probs.append(prediction_test)
        test_question_probs.append(prediction_test2)
        print(','.join([str(p) for p in prediction_test2]))
Example #2
0
    word2idx  = dic['words2idx']
    idx2label = dict((k,v) for v,k in dic['labels2idx'].iteritems())
    idx2word  = dict((k,v) for v,k in dic['words2idx'].iteritems())
    
    vocsize = len(dic['words2idx'])
    nclasses = len(dic['labels2idx'])
    
    print 'load model'
    rnn = model(    nh = s['nhidden'],
                    nc = nclasses,
                    ne = vocsize,
                    de = s['emb_dimension'],
                    cs = s['win'] )
    rnn.load(folder)
    
    print 'test ...'
    test_lex = ['NOUN', 'X', 'NOUN', 'NOUN', 'NOUN']
    test_lex = [ word2idx[w] for w in test_lex]
    test_slex = ['NOUN', 'NOUN', 'CONJ', 'NOUN', 'NOUN', 'NOUN']
    test_slex = [ word2idx[w] for w in test_slex]
    test_sy = ['O', 'O', 'O', 'B-T', 'I-T', 'I-T']
    test_sy = [ label2idx[w] for w in test_sy]
    
    # evaluation
    predictions_test = [ map(lambda x: idx2label[x], \
                         rnn.classify(numpy.asarray(contextwin(test_lex, s['win'])).astype('int32'), test_slex, test_sy)) ]
    print predictions_test

                

Example #3
0
                    x_fvec.append(model[term.split('-')[0]])
                except KeyError:
                    # same hack as in the training process
                    x_fvec.append(model['some'])
            # map word to label_index
            if term in word2labelindx:
                # is label
                labels.append(word2labelindx[term])
            else:
                # not a label
                labels.append(word2labelindx["XXXXX"])
            termss.append(term)

        # add a PADDING-END word at the rightend (the last word in the sentence)
        labels.append(word2labelindx["</s>"])
        # remove a PADDING_START word at the begining.
        labels.pop(0)

        cwords = contextwin(x_fvec, s['win'], model["<s>"], model["</s>"])
        prediction_test = rnn.test(numpy.asarray(cwords).astype('float32'))

        prediction_test2 = []

        for term_indx in range(len(labels)):
            prediction_test2.append(prediction_test[term_indx,
                                                    labels[term_indx]])

        #test_question_probs.append(prediction_test)
        test_question_probs.append(prediction_test2)
        print(','.join([str(p) for p in prediction_test2]))
    rnn = model(    nh = s['nhidden'],
                    nc = nclasses,
                    ne = vocsize,
                    de = s['emb_dimension'],
                    cs = s['win'] )

    # train with early stopping on validation set
    best_f1 = -numpy.inf
    s['clr'] = s['lr']
    for e in xrange(s['nepochs']):
        # shuffle
        shuffle([train_lex, train_ne, train_y], s['seed'])
        s['ce'] = e
        tic = time.time()
        for i in xrange(nsentences):
            cwords = contextwin(train_lex[i], s['win'])
            words  = map(lambda x: numpy.asarray(x).astype('int32'),\
                         minibatch(cwords, s['bs']))
            labels = train_y[i]

            for word_batch , label_last_word in zip(words, labels):
                rnn.train(word_batch, label_last_word, s['clr'])
                rnn.normalize()

            if s['verbose']:
                print '[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./nsentences),'completed in %.2f (sec) <<\r'%(time.time()-tic),
                sys.stdout.flush()
            
        # evaluation // back into the real world : idx -> words
        predictions_test = [ map(lambda x: idx2label[x], \
                             rnn.classify(numpy.asarray(contextwin(x, s['win'])).astype('int32')))\
Example #5
0
'''

best_params = {}
# train with early stopping on validation set
best_f1 = -numpy.inf
s['clr'] = s['lr']
training_loss = []
for e in xrange(s['nepochs']):
    # shuffling of data per epoch
    shuffle([X_train_idxs, X_train_pos_idxs, X_train_chunk_idxs, Y_train_idxs], s['seed'])
    s['ce'] = e
    tic = time.time()
    loss = 0.0
    for i in xrange(num_train_sentences):
        #print X_train_idxs[i]
        sentence_forward = contextwin(X_train_idxs[i], s['win'])
        sentence_backward = list(reversed(sentence_forward))
        sentence_pos_forward = contextwin(X_train_pos_idxs[i], s['win'])
        sentence_pos_backward = list(reversed(sentence_pos_forward))
        sentence_chunk_forward = contextwin(X_train_chunk_idxs[i], s['win'])
        sentence_chunk_backward = list(reversed(sentence_chunk_forward))
        labels = Y_train_idxs[i]
        #loss += rnn.sentence_train(sentence_forward, sentence_pos_forward, sentence_chunk_forward, labels, s['clr'])
        #rnn.normalize()
        loss += birnn.sentence_train(sentence_forward, sentence_backward, sentence_pos_forward, sentence_pos_backward,
                                     sentence_chunk_forward, sentence_chunk_backward, labels, s['clr'])
        birnn.normalize()
        #loss += lstm.sentence_train(sentence_forward, sentence_pos_forward, sentence_chunk_forward, labels, s['clr'])
        #lstm.normalize()
        if s['verbose']:
            print '[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./num_train_sentences),'completed in %.2f (sec) <<\r'%(time.time()-tic),
Example #6
0
                fnum0=s['f0'],
                fnum1=s['f1'],
                fnum2=s['f2'],
                fnum3=s['f3'],
                fnum4=s['f4'],
                kalpha=s['kalpha'])
    rnn.emb = load_emb('data/embeddingsall')
    #print 'load parameter'

    s['cur_lr'] = s['lr']

    dev_pred = []
    for words, f0i, f1i, f2i, f3i, f4i in zip(dev_word, dev_f0, dev_f1, dev_f2,
                                              dev_f3, dev_f4):
        dev_pred += [
            rnn.classify(contextwin(words, s['wsize']),
                         contextwin(f0i, s['fsize']),
                         contextwin(f1i, s['fsize']),
                         contextwin(f2i, s['fsize']),
                         contextwin(f3i, s['fsize']),
                         contextwin(f4i, s['fsize']))
        ]
    res_dev = conlleval(dev_pred, dev_label, dev_word)
    print ""
    for (d, x) in res_dev.items():
        print d + ": " + str(x)
    sys.stdout.flush()
    best_dev = 0

    train_weight = list([1.0] for i in range(nsentences))
Example #7
0
File: run.py Project: iankuoli/RNN
                    x_fvec.append(model[term])
                else:
                    # for instance: 'good-humoured' ==> 'good'
                    x_fvec.append(model[term.split('-')[0]])

                # map word to label_index
                if term in word2labelindx:
                    # is label
                    labels.append(word2labelindx[term])
                else:
                    # not a label
                    labels.append(word2labelindx["XXXXX"])
            #  --- end modified ---

            #cwords = contextwin(train_lex[i], s['win'])
            cwords = contextwin(x_fvec[i], s['win'], model["<s>"], model["</s>"])


            words  = map(lambda x: numpy.asarray(x).astype('int32'), minibatch(cwords, s['bs']))
            #labels = train_y[i]


            for word_batch, label_last_word in zip(words, labels):
                rnn.train(word_batch, label_last_word, s['clr'])
                #rnn.normalize()
            if s['verbose']:
                print('[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./nsentences),'completed in %.2f (sec) <<\r'%(time.time()-tic),)
                sys.stdout.flush()

        # evaluation // back into the real world : idx -> words
        """
Example #8
0
    # train with early stopping on validation set
    print 'train with set...'
    best_f1 = -numpy.inf
    s['clr'] = s['lr']

    print time.localtime(time.time())

    for e in xrange(s['nepochs']):
        # shuffle
        shuffle([train_lex, train_y], s['seed'])
        s['ce'] = e
        tic = time.time()
        for i in xrange(nsentences):
            #print 'i=', i
            cwords = contextwin(train_lex[i], s['win'])
            words = map(lambda x: numpy.asarray(x).astype('int32'),
                        minibatch(cwords, s['bs']))
            labels = train_y[i]
            #print 'label=', labels
            for word_batch, label_last_word in zip(words, labels):
                t = rnn.train(word_batch, label_last_word, s['clr'])
                rnn.normalize()
            if (i + 1) % 270 == 0 & s['verbose']:
                print '[learning] epoch %i >> %2.2f%%' % (
                    e, (i + 1) * 100. / nsentences
                ), 'completed in %.2f (sec) <<\r' % (time.time() - tic)
                # sys.stdout.flush()

        # evaluation // back into the real world : idx -> words
        print 'evaluation step1: back into the real world : idx -> words'
Example #9
0
                de=s['emb_dimension'],
                cs=s['win'])

    # train with early stopping on validation set
    best_f1 = -numpy.inf
    s['clr'] = s['lr']
    for e in range(s['nepochs']):

        # shuffle training data
        shuffle([train_lex, train_ne, train_y], s['seed'])
        s['ce'] = e
        tic = time.time()

        # for each sentence
        for i in range(nsentences):
            cwords = contextwin(train_lex[i], s['win'])
            words = map(lambda x: numpy.asarray(x).astype('int32'),
                        minibatch(cwords, s['bs']))
            labels = train_y[i]

            # each sentence is a minibatch, perform one update per sentence
            for word_batch, label_last_word in zip(words, labels):
                rnn.train(word_batch, label_last_word, s['clr'])
                rnn.normalize()

            print('training epoch %i > %2.2f%%' %
                  (e, (i + 1) * 100. / nsentences))
            sys.stdout.flush()

        # evaluation
        # test_y, valid_y, train_y contain correct labels
Example #10
0
                fsize=s['fsize'],
                L2=s['L2'],
                fnum0=s['f0'],
                fnum1=s['f1'],
                fnum2=s['f2'],
                fnum3=s['f3'],
                fnum4=s['f4'],
                kalpha=s['kalpha'])
    #rnn.emb = load_emb("data/embeddingsall")
    s['cur_lr'] = s['lr']

    dev_pred = []
    for words, f0i, f1i, f2i, f3i, f4i in zip(dev_word, dev_f0, dev_f1, dev_f2,
                                              dev_f3, dev_f4):
        dev_pred += [
            rnn.classify(contextwin(words, s['wsize']),
                         contextwin(f0i, s['fsize']),
                         contextwin(f1i, s['fsize']),
                         contextwin(f2i, s['fsize']),
                         contextwin(f3i, s['fsize']),
                         contextwin(f4i, s['fsize']))
        ]
    fp = open("data/semi_test_pred.txt", 'w')
    for i in range(len(dev_pred)):
        for j in range(len(dev_pred[i])):
            fp.write(str(dev_pred[i][j]) + " ")
        fp.write("\n")
    fp.close()

    res_dev = conlleval(train_label, train_word, dev_pred, dev_label, dev_word)
    print ""