use_gpu(1) e = 0.01 lr = 0.8 drop_rate = 0. batch_size = 20 hidden_size = [100, 100] input_size = 9235 output_size = 104 # try: gru, lstm cell = "gru" # try: sgd, momentum, rmsprop, adagrad, adadelta, adam optimizer = "adadelta" seqs, i2w, w2i, data_xy, existing_annos = data.char_sequence("/gds/zhwang/zhwang/data/cuhk/11", batch_size, input_size, output_size) #dim_x = len(w2i) dim_x = input_size dim_y = output_size #dim_y = len(existing_annos) print "#features = ", dim_x, "#labels = ", dim_y print "compiling..." model = RNN(dim_x, dim_y, hidden_size, cell, optimizer, drop_rate) print "training..." start = time.time() g_error = 9999.9999 for i in xrange(100): error = 0.0
#pylint: skip-file import time import sys import numpy as np import theano import theano.tensor as T from utils_pg import * from rnn import * use_gpu(0) import data drop_rate = 0. batch_size = 20 seqs, i2w, w2i, data_xy = data.char_sequence("/data/toy.txt", batch_size) hidden_size = [100, 100] dim_x = len(w2i) dim_y = len(w2i) print dim_x, dim_y cell = "gru" # cell = "gru" or "lstm" optimizer = "adadelta" print "building..." model = RNN(dim_x, dim_y, hidden_size, cell, optimizer, drop_rate) model = load_model("./model/char_rnn.model", model) num_x = 0.0 acc = 0.0 for s in xrange(len(seqs)): seq = seqs[s]
use_gpu(1) e = 0.01 lr = 0.8 drop_rate = 0. batch_size = 20 hidden_size = [100, 100] input_size = 9235 output_size = 104 # try: gru, lstm cell = "gru" # try: sgd, momentum, rmsprop, adagrad, adadelta, adam optimizer = "adadelta" seqs, i2w, w2i, data_xy, existing_annos = data.char_sequence( "/gds/zhwang/zhwang/data/cuhk/11", batch_size, input_size, output_size) #dim_x = len(w2i) dim_x = input_size dim_y = output_size #dim_y = len(existing_annos) print "#features = ", dim_x, "#labels = ", dim_y print "compiling..." model = RNN(dim_x, dim_y, hidden_size, cell, optimizer, drop_rate) print "training..." start = time.time() g_error = 9999.9999 for i in xrange(100): error = 0.0
from rnn import * import data #use_gpu(0) e = 0.01 lr = 4.0 drop_rate = 0. batch_size = 128 hidden_size = [256] # try: gru, lstm cell = "lstm" # try: sgd, momentum, rmsprop, adagrad, adadelta, adam, nesterov_momentum optimizer = "sgd" seqs, i2w, w2i, data_xy = data.char_sequence("../../data/19.txt", batch_size) dim_x = len(w2i) dim_y = len(w2i) print "#features = ", dim_x, "#labels = ", dim_y XX = theano.shared(np.asarray(data_xy[0])) YY = theano.shared(np.asarray(data_xy[1])) batch_l = data_xy[2] print "compiling..." model = RNN(dim_x, dim_y, hidden_size, XX, YY, cell, optimizer) print "training..." start = time.time() g_error = 9999.9999 count = 0 for i in xrange(1):
from rnn import * import data use_gpu(0) e = 0.01 lr = 0.2 drop_rate = 0. batch_size = 20 hidden_size = [100, 100] # try: gru, lstm cell = "gru" # try: sgd, momentum, rmsprop, adagrad, adadelta, adam, nesterov_momentum optimizer = "nesterov_momentum" seqs, i2w, w2i, data_xy = data.char_sequence("/data/toy.txt", batch_size) dim_x = len(w2i) dim_y = len(w2i) print "#features = ", dim_x, "#labels = ", dim_y print "compiling..." model = RNN(dim_x, dim_y, hidden_size, cell, optimizer, drop_rate) print "training..." start = time.time() g_error = 9999.9999 for i in xrange(200): error = 0.0 in_start = time.time() for batch_id, xy in data_xy.items(): X = xy[0]
local_rnn = False # true: lstm.py; false: nn.LSTM lr = 1 drop_rate = 0. batch_size = 128 hidden_size = 500 emb_size = 300 cell = "lstm" use_cuda = use_gpu and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") dtype = torch.FloatTensor xy_list, i2w, w2i, batch_list = data.char_sequence("/data/shakespeare.txt", batch_size) dict_size = len(w2i) num_batches = len(batch_list) print "dict_size=", dict_size, "#batchs=", num_batches, "#batch_size", batch_size print "compiling..." model = RNN(dict_size, hidden_size, batch_size, emb_size, dict_size, local_rnn, cell) optimizer = torch.optim.Adadelta(model.parameters(), lr=lr) weight = torch.ones(dict_size) weight[0] = 0.0 if use_cuda: model.cuda() weight = weight.cuda() criterion = nn.CrossEntropyLoss(weight)
import time import sys import numpy as np import theano import theano.tensor as T from utils import * from rnn import * import data import jieba reload(sys) sys.setdefaultencoding('utf8') theano.config.optimizer = "fast_compile" theano.config.exception_verbosity = "high" seqs, i2w, w2i, data_xy = data.char_sequence("./data/computer.txt") e = 0.01 lr = 0.1 drop_rate = 0.3 batch_size = 1 hidden_size = [400, 400] dim_x = len(w2i) dim_y = len(w2i) print dim_x, dim_y cell = "gru" print "building..." model = RNN(dim_x, dim_y, hidden_size, cell, drop_rate) # cell = "gru" or "lstm"
use_gpu = True lr = 1 drop_rate = 0. batch_size = 3 hidden_size = 100 emb_size = 200 cell = "lstm" use_cuda = use_gpu and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") dtype = torch.FloatTensor xy_list, i2w, w2i, batch_list = data.char_sequence("/data/toy.txt", batch_size) dict_size = len(w2i) num_batches = len(batch_list) print "dict_size=", dict_size, "#batchs=", num_batches, "#batch_size", batch_size print "compiling..." model = RNN(dict_size, hidden_size, batch_size, emb_size, dict_size, cell) optimizer = torch.optim.Adadelta(model.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() if use_cuda: model.cuda() #def train(x, y): print "training..." start = time.time()