# batch train start = time.time() for i in xrange(100): acc = 0.0 in_start = time.time() # for s in xrange(len(seqs)): # seq = seqs[s] # X = seq[0 : len(seq) - 1, ] # Y = seq[1 : len(seq), ] # model.batch_train(X, Y, lr) # in_time = time.time() - in_start for batch_id, xy in data_xy.items(): # print xy[0] X, Y, zero_m, seqs_len = data.index2seqs(seqs, xy[0], w2i) print len(X), len(Y) concat_X = np.zeros((seqs_len, len(X) * len(w2i)), dtype=theano.config.floatX) concat_Y = concat_X.copy() for b_i in xrange(len(X)): iX = X[b_i] iY = Y[b_i] for r in xrange(seqs_len - iX.shape[0]): iX = np.concatenate((iX, zero_m), axis=0) iY = np.concatenate((iY, zero_m), axis=0) model.batch_train(iX, iY, lr) # print X, Y # model.batch_train(X, Y, lr)
seqs, i2w, w2i, data_xy = data.load_hlm("/data/hlm/hlm.txt", batch_size) dim_x = len(w2i) dim_y = len(w2i) print "#features = ", dim_x, "#labels = ", dim_y print "compiling..." model = RNN(dim_x, dim_y, hidden_size, cell, optimizer, drop_rate) #model = load_model("./model/rnn_hlm.model", model) print "training..." start = time.time() g_error = 9999.9999 for i in xrange(100): error = 0.0 in_start = time.time() for batch_id, xy in data_xy.items(): X, Y, mask, local_batch_size = data.index2seqs(seqs, xy[0], w2i) cost = model.train(X, mask, Y, lr, local_batch_size)[0] error += cost print i, g_error, batch_id, "/", len(data_xy), cost in_time = time.time() - in_start error /= len(seqs); if error < g_error: g_error = error save_model("./model/rnn_hlm.model_" + str(i), model) print "Iter = " + str(i) + ", Error = " + str(error) + ", Time = " + str(in_time) if error <= e: break print "Finished. Time = " + str(time.time() - start)