Example #1
0
File: cws.py Project: zbxzc35/cws
class CWS:
    def __init__(self, s):
	self.rnn = RNN(s['ne'], s['de'], s['win'], s['nh'], s['nc'], np.random.RandomState(s['seed']))
	self.s = s

    def fit(self, lex, label):
	s = self.s
	n_sentences = len(lex)
	n_train = int(n_sentences * (1. - s['valid_size']))
	s['clr'] = s['lr']
	best_f = 0
	be = 0
	for e in xrange(s['n_epochs']):
	    shuffle([lex, label], s['seed'])
	    train_lex, valid_lex = lex[:n_train], lex[n_train:]
	    train_label, valid_label = label[:n_train], label[n_train:]
	    tic = time.time()
	    for i in xrange(n_train):
		cwords = contextwin(train_lex[i], s['win'])
		words = map(lambda x: np.asarray(x).astype('int32'), minibatch(cwords, s['bs']))
		labels = train_label[i]
		for word_batch, label_last_word in zip(words, labels):
		    self.rnn.fit(word_batch, label_last_word, s['clr'])
		    self.rnn.normalize()
		    if s['verbose']:
			print '[learning] epoch %i >> %2.2f%%' % (e+1, (i+1)*100./n_train), 'completed in %s << \r' % time_format(time.time() - tic),
			sys.stdout.flush()

	    pred_y = self.predict(valid_lex)
	    p, r, f = evaluate(pred_y, valid_label)
	    print '[learning] epoch %i >> P: %2.2f%% R: %2.2f%% F: %2.2f%%' % (e+1, p*100., r*100., f*100.), '<< %s used' % time_format(time.time() - tic)
	    
	    if f > best_f:
		best_f = f
		be = e
		self.save()
    
	    if s['decay'] and e - be >= 5: s['clr'] *= 0.5	    
	    if s['clr'] < 1e-5: break

    def predict(self, lex):
	s = self.s
	y = [self.rnn.predict(np.asarray(contextwin(x, s['win'])).astype('int32'))[1:-1] for x in lex]
	return y

    def save(self):
	if not os.path.exists('params'): os.mkdir('params')
	self.rnn.save() 

    def load(self):
	self.rnn.load()
Example #2
0
from rnn import RNN
from tokens import tokenize

f = open('input.txt', 'r')
sentences = list(filter(None, f.read().split('\n\n')))
f.close()
# print((sentences))
tokens = []
X = []
Y = []
# word_dim = 58
for i in range(len(sentences)):
    tokens.append([])
    for j in range(len(sentences[i])):
        if (tokenize(sentences[i][j]) != "UNKNOWN"):
            tokens[i].append(tokenize(sentences[i][j]))
    X.append(tokens[i][:-1])
    Y.append(tokens[i][1:])

model = RNN(61, 200, 10)
model.load('uwv.pkl')
model.train(X, Y, 0.1, 10, 1)
                           labels=None,
                           output_shape=2)
#generator = BatchGenerator(sonar_text, batch_size, num_unrollings, labels=sonar_labels, output_shape=2)

##########################################
# Create RNN
##########################################

summary_frequency = 10
num_nodes = 128
num_layers = 1
model_path = './model/checkpoint.ckpt'

RNN = RNN(summary_frequency,
          num_nodes,
          num_layers,
          generator,
          labeled=False,
          output_shape=2)

RNN.train(100)
RNN.plot()

RNN.save(model_path, full_model=False)
RNN.load(model_path, full_model=False)

#RNN.sample_sentence('japan is al geruime tijd een natie van de erkende naties inderdaad d', 100)
#RNN.sample_sentence(' ' * (num_unrollings+1), 1000)

#x,y=generator._next()
#pred = RNN.predict(x)