vocsize = len(words2idx)
    nclasses = len(labels2idx)
    nsentences = len(test_lex)

    numpy.random.seed(s['seed'])
    random.seed(s['seed'])
    rnn = model(    nh = s['nhidden'],
                    nc = nclasses,
                    ne = vocsize,
                    de = s['emb_dimension'],
                    cs = s['win'] )
    rnn.load(folder)
    #select a few popular words show, me, movies, what, in
    #w_idx = word2idx['show'];
    #print w_idx
    #w_emb = rnn.emb[w_idx]
    #print w_emb
    #word->index->emb
    #find n nearest embeddings
    #emb->index->word

    predictions_test = [ map(lambda x: idx2label[x], \
                             rnn.classify(numpy.asarray(contextwin(x, s['win'])).astype('int32')))\
                             for x in test_lex ]
    groundtruth_test = [ map(lambda x: idx2label[x], y) for y in test_y ]
    words_test = [ map(lambda x: idx2word[x], w) for w in test_lex]
    res_test = conlleval(predictions_test, groundtruth_test, words_test, sys.argv[6])

    #print 'Test set performance -- F1: ', res_test['f1'], ' '*20
    print res_test['a'],' ',res_test['p'],' ',res_test['r'],' ',res_test['f1'],' '
                    ne = vocsize,
                    de = s['emb_dimension'],
                    cs = s['win'] )

    # train with early stopping on validation set
    best_f1 = -numpy.inf
    s['clr'] = s['lr']
    for e in xrange(s['nepochs']):
        # shuffle
        shuffle([train_lex, train_y], s['seed'])
        s['ce'] = e
        tic = time.time()
	total_cost = 0
        count = 0
        for i in xrange(nsentences):
            cwords = contextwin(train_lex[i], s['win'])
            words  = map(lambda x: numpy.asarray(x).astype('int32'),\
                         minibatch(cwords, s['bs']))
            labels = train_y[i]
            for word_batch , label_last_word in zip(words, labels):
		total_cost += rnn.train(word_batch, label_last_word, s['clr'])
                count +=1
                rnn.normalize()
            if s['verbose']:
                print '[learning] epoch %i >> %2.2f%%'%(e,(i+1)*100./nsentences),'completed in %.2f (sec) <<\r'%(time.time()-tic),
                sys.stdout.flush()
        print ''
        print 'Learning rate: %2.4f'%(s['clr'])
        print 'Average Training Cost: %2.4f'%(total_cost/count)

        predictions_valid = [ map(lambda x: idx2label[x], \