def setup_network(self, n_features): if self.net_type == 'lstm': self.network = CharLSTM(self.vocab_size, self.net_hidden, self.batch_size) elif self.net_type == 'irnn': self.network = CharIRNN(self.vocab_size, self.net_hidden, self.batch_size) else: error("Unknown net_type") self.reset_accum_loss()
class RNNCharEstimator(ChainerClassifier): def __init__(self, net_type='lstm', net_hidden=100, vocab_size=1000, dropout_ratio=0.0, seq_size=70, grad_clip=100.0, **params): ChainerClassifier.__init__(self, **params) self.net_hidden = net_hidden self.net_type = net_type self.vocab_size = vocab_size self.dropout_ratio = dropout_ratio self.seq_size = seq_size self.grad_clip = grad_clip self.param_names.append('vocab_size') self.param_names.append('net_type') self.param_names.append('net_hidden') self.param_names.append('dropout_ratio') def setup_network(self, n_features): if self.net_type == 'lstm': self.network = CharLSTM(self.vocab_size, self.net_hidden, self.batch_size) elif self.net_type == 'irnn': self.network = CharIRNN(self.vocab_size, self.net_hidden, self.batch_size) else: error("Unknown net_type") self.reset_accum_loss() def reset_accum_loss(self): if self.gpu >= 0: self.accum_loss = Variable(cuda.zeros(())) else: self.accum_loss = Variable(np.zeros(())) def forward_train(self, x, t): return self.network.train(x, t, dropout_ratio=self.dropout_ratio) def predict(self, x_data): self.network.reset_state(1) if self.gpu >= 0: self.network.to_gpu() x_data = cuda.to_gpu(x_data) results = None for i in xrange(x_data.shape[0]): x = Variable(x_data[i,:]) y = self.network.predict(x) if results == None: results = cuda.to_cpu(y.data) else: results = np.concatenate([results, cuda.to_cpu(y.data)]) results = results.argmax(1) return results def fit_update(self, loss, batch_id): self.accum_loss += loss if ((batch_id + 1) % self.seq_size) == 0: # Run Truncated BPTT self.optimizer.zero_grads() self.accum_loss.backward() self.accum_loss.unchain_backward() # truncate self.optimizer.clip_grads(self.grad_clip) self.optimizer.update() self.reset_accum_loss() def make_batch(self, x_data, y_data, batch_id): batch_num = self.n_samples / self.batch_size x_batch = np.array([x_data[(batch_id + batch_num * j) % self.n_samples] for j in xrange(self.batch_size)]).reshape(self.batch_size) y_batch = np.array([y_data[(batch_id + batch_num * j) % self.n_samples] for j in xrange(self.batch_size)]) return x_batch, y_batch
class RNNCharEstimator(ChainerClassifier): def __init__(self, net_type='lstm', net_hidden=100, vocab_size=1000, dropout_ratio=0.0, seq_size=70, grad_clip=100.0, **params): ChainerClassifier.__init__(self, **params) self.net_hidden = net_hidden self.net_type = net_type self.vocab_size = vocab_size self.dropout_ratio = dropout_ratio self.seq_size = seq_size self.grad_clip = grad_clip self.param_names.append('vocab_size') self.param_names.append('net_type') self.param_names.append('net_hidden') self.param_names.append('dropout_ratio') def setup_network(self, n_features): if self.net_type == 'lstm': self.network = CharLSTM(self.vocab_size, self.net_hidden, self.batch_size) elif self.net_type == 'irnn': self.network = CharIRNN(self.vocab_size, self.net_hidden, self.batch_size) else: error("Unknown net_type") self.reset_accum_loss() def reset_accum_loss(self): if self.gpu >= 0: self.accum_loss = Variable(cuda.zeros(())) else: self.accum_loss = Variable(np.zeros(())) def forward_train(self, x, t): return self.network.train(x, t, dropout_ratio=self.dropout_ratio) def predict(self, x_data): self.network.reset_state(1) if self.gpu >= 0: self.network.to_gpu() x_data = cuda.to_gpu(x_data) results = None for i in xrange(x_data.shape[0]): x = Variable(x_data[i, :]) y = self.network.predict(x) if results == None: results = cuda.to_cpu(y.data) else: results = np.concatenate([results, cuda.to_cpu(y.data)]) results = results.argmax(1) return results def fit_update(self, loss, batch_id): self.accum_loss += loss if ((batch_id + 1) % self.seq_size) == 0: # Run Truncated BPTT self.optimizer.zero_grads() self.accum_loss.backward() self.accum_loss.unchain_backward() # truncate self.optimizer.clip_grads(self.grad_clip) self.optimizer.update() self.reset_accum_loss() def make_batch(self, x_data, y_data, batch_id): batch_num = self.n_samples / self.batch_size x_batch = np.array([ x_data[(batch_id + batch_num * j) % self.n_samples] for j in xrange(self.batch_size) ]).reshape(self.batch_size) y_batch = np.array([ y_data[(batch_id + batch_num * j) % self.n_samples] for j in xrange(self.batch_size) ]) return x_batch, y_batch
n_epochs = args.epochs n_units = args.rnn_size batchsize = args.batchsize bprop_len = args.seq_length grad_clip = args.grad_clip train_data, words, vocab = load_data(args) pickle.dump(vocab, open('%s/vocab.bin' % args.data_dir, 'wb')) if len(args.init_from) > 0: model = pickle.load(open(args.init_from, 'rb')) else: if args.net == 'lstm': model = CharLSTM(len(vocab), n_units) elif args.net == 'irnn': model = CharIRNN(len(vocab), n_units) else: error("unknown net") state = model.make_initial_state(n_units, batchsize=batchsize) if args.gpu >= 0: cuda.init() model.to_gpu() if args.optimizer == 'rmsprop': optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate) elif args.optimizer == 'adam': optimizer = optimizers.Adam() elif args.optimizers == 'adagrad':