def dothot(a, indices, in_size, out=None, dont_add=False): """ In: a: a numpy array indices: hot indices a K-hot encoded matrix out: out: a.T.dot(x), where x is a K-hot encoded matrix """ N, H = a.shape _N, K = indices.shape if _N != N: raise ValueError( 'a.shape[0] != idx.shape[0]' ) bdim, gdim = Get_bdim_and_gdim1D(H) if dont_add: B = np.int32(1) else: B = np.int32(0) if out is None: out = cuda.zeros((H,in_size), dtype=np.float32) if K > 1: DotHot1_kernel.prepared_call(gdim, bdim, a.gpudata, out.gpudata, indices.gpudata, np.int32(K), np.int32(N), np.int32(H), np.int32(in_size), np.int32(B)) else: DotHot2_kernel.prepared_call(gdim, bdim, a.gpudata, out.gpudata, indices.gpudata, np.int32(N), np.int32(H), np.int32(in_size), np.int32(B)) return out
def trans(self, fromd, tod, train=True): self.bs = fromd.shape[1] self.enc.reset_state() self.mid.reset_state() self.dec.reset_state() self.zd = Variable(cuda.zeros((self.bs, self.nh), dtype=np.float32)) loss = 0 out = [] for wd in reversed(fromd): #print wd x = Variable(cuda.to_gpu(np.reshape(wd, (self.bs, 1)))) #print x.data y = self.enc(self.embed(x)) self.mid.set_state(self.enc.c, self.enc.h) y = self.mid(self.zd) self.dec.set_state(self.mid.c, self.mid.h) if train: for wd in tod: t = Variable(cuda.to_gpu(np.reshape(wd, (self.bs)))) # t.data) x = self.out(self.dec(self.zd)) loss += F.softmax_cross_entropy(x, t) else: for i in xrange(100): x = self.out(self.dec(self.zd)) ch = np.argmax(x.data[0]) # bs should be 1 out.append(ch) if ch == 0: break return loss, out
def forward_gpu(self, x): a, b = x shape = self._output_shape(a, b) ret = cuda.zeros(shape) _batch_matmul_gpu( a, b, transa=self.transa, transb=self.transb, out=ret) return ret,
if len(args.init_from) > 0: model = pickle.load(open(args.init_from, 'rb')) else: model = CharRNN(len(vocab), n_units) if args.gpu >= 0: cuda.init() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) optimizer.setup(model.collect_parameters()) whole_len = train_data.shape[0] jump = whole_len / batchsize cur_log_perp = cuda.zeros(()) epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(n_units, batchsize=batchsize) if args.gpu >= 0: accum_loss = Variable(cuda.zeros(())) for key, value in state.items(): value.data = cuda.to_gpu(value.data) else: accum_loss = Variable(np.zeros(())) print 'going to train {} iterations'.format(jump * n_epochs) for i in xrange(jump * n_epochs): x_batch = np.array([train_data[(jump * j + i) % whole_len] for j in xrange(batchsize)])
def zeros(shape, dtype=np.float32): #return Variable(np.zeros(shape, dtype=dtype)) return Variable(cuda.zeros(shape, dtype=dtype))
if args.gpu >= 0: cuda.init() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) optimizer.setup(model) whole_len = train_data.shape[0] jump = whole_len / batchsize epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(n_units, batchsize=batchsize) if args.gpu >= 0: accum_loss = Variable(cuda.zeros(())) for key, value in state.items(): value.data = cuda.to_gpu(value.data) else: accum_loss = Variable(np.zeros(()).astype(np.float32)) print 'going to train {} iterations'.format(jump * n_epochs) for i in xrange(jump * n_epochs): x_batch = np.array([train_data[(jump * j + i) % whole_len] for j in xrange(batchsize)]) y_batch = np.array([train_data[(jump * j + i + 1) % whole_len] for j in xrange(batchsize)]) if args.gpu >=0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch)
def _train(self, **kwargs): gpu = -1 if "gpu" not in kwargs else kwargs["gpu"] lr = 2e-3 if "lr" not in kwargs else kwargs["lr"] lr_decay = 0.97 if "lr_decay" not in kwargs else kwargs["lr_decay"] lr_decay_after=10 if "lr_decay_after" not in kwargs else kwargs["lr_decay_after"] decay_rate = 0.95 if "decay_rate" not in kwargs else kwargs["decay_rate"] dropout = 0.0 if "dropout" not in kwargs else kwargs["dropout"] bprop_len = 50 if "bprop_len" not in kwargs else kwargs["bprop_len"] batchsize = 50 if "batchsize" not in kwargs else kwargs["batchsize"] grad_clip = 5 if "grad_clip" not in kwargs else kwargs["grad_clip"] n_epochs = 5 if "epochs" not in kwargs else kwargs["epochs"] if gpu >= 0: cuda.get_device(gpu).use() self.model.to_gpu() optimizer = optimizers.RMSprop(lr=lr, alpha=decay_rate, eps=1e-8) optimizer.setup(self.model) train_data = self.dataset whole_len = train_data.shape[0] jump = whole_len // batchsize epoch = 0 start_at = time.time() cur_at = start_at state = self.model.make_initial_state(batchsize=batchsize) if gpu >= 0: accum_loss = Variable(cuda.zeros(())) for key, value in state.items(): value.data = cuda.to_gpu(value.data)#plist else: accum_loss = Variable(np.zeros((), dtype=np.float32)) print ('going to train {} iterations'.format(jump * n_epochs)) for i in range(jump * n_epochs): x_batch = np.array([train_data[(jump * j + i) % whole_len] for j in range(batchsize)]) y_batch = np.array([train_data[(jump * j + i + 1) % whole_len] for j in range(batchsize)]) if gpu >=0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch) state, loss_i = self.model.forward_one_step(x_batch, y_batch, state, dropout_ratio=dropout) accum_loss += loss_i if (i + 1) % bprop_len == 0: # Run truncated BPTT now = time.time() sys.stderr.write('\r{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)//bprop_len,(jump*n_epochs)//bprop_len, accum_loss.data / bprop_len, now-cur_at)) sys.stderr.flush() cur_at = now optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate if gpu >= 0: accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) optimizer.clip_grads(grad_clip) optimizer.update() if (i + 1) % 10000 == 0: pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb')) if (i + 1) % jump == 0: epoch += 1 if epoch >= lr_decay_after: optimizer.lr *= lr_decay print ('decayed learning rate by a factor {} to {}'.format(lr_decay, optimizer.lr)) sys.stdout.flush() pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))
if args.optimizer == 'rmsprop': optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate) elif args.optimizer == 'adam': optimizer = optimizers.Adam() elif args.optimizers == 'adagrad': optimizer = optimizers.AdaGrad(lr=args.learning_rate) else: error("unknown optimizer") optimizer.setup(model.collect_parameters()) whole_len = train_data.shape[0] jump = whole_len / batchsize cur_log_perp = cuda.zeros(()) epoch = 0 start_at = time.time() cur_at = start_at if args.gpu >= 0: accum_loss = Variable(cuda.zeros(())) for key, value in state.items(): value.data = cuda.to_gpu(value.data) else: accum_loss = Variable(np.zeros(())) print 'going to train {} iterations'.format(jump * n_epochs) for i in xrange(jump * n_epochs): x_batch = np.array( [train_data[(jump * j + i) % whole_len] for j in xrange(batchsize)])
def reset_accum_loss(self): if self.gpu >= 0: self.accum_loss = Variable(cuda.zeros(())) else: self.accum_loss = Variable(np.zeros(()))
eps=1e-8) optimizer.setup(model.collect_parameters()) whole_len = len(train_data) whole_val_len = len(val_data) epoch = 0 start_at = time.time() cur_at = start_at end_time = 0 state = make_initial_state(n_units) train_loss_all = [] val_loss_all = [] iterations_count = 0 if args.gpu >= 0: loss = Variable(cuda.zeros(())) val_loss = Variable(cuda.zeros(())) for key, value in state.items(): value.data = cuda.to_gpu(value.data) else: loss = Variable(np.zeros((), dtype=np.float32)) val_loss = Variable(np.zeros((), dtype=np.float32)) for i in xrange(whole_len * n_epochs): for j in xrange(0, len(train_data[i % whole_len]) - 1): x_t = np.array([train_data[i % whole_len][j]]) y_t = np.array([train_data[i % whole_len][j + 1]]) if args.gpu >= 0: x_t = cuda.to_gpu(x_t) y_t = cuda.to_gpu(y_t) state, loss_i = model.forward_one_step(x_t,
optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) optimizer.setup(model.collect_parameters()) whole_len = len(train_data) whole_val_len = len(val_data) epoch = 0 start_at = time.time() cur_at = start_at end_time = 0 state = make_initial_state(n_units) train_loss_all = [] val_loss_all = [] iterations_count = 0 if args.gpu >= 0: loss = Variable(cuda.zeros(())) val_loss = Variable(cuda.zeros(())) for key, value in state.items(): value.data = cuda.to_gpu(value.data) else: loss = Variable(np.zeros((), dtype=np.float32)) val_loss = Variable(np.zeros((), dtype=np.float32)) for i in xrange(whole_len * n_epochs): for j in xrange(0, len(train_data[i % whole_len]) - 1): x_t = np.array([train_data[i % whole_len][j]]) y_t = np.array([train_data[i % whole_len][j + 1]]) if args.gpu >= 0: x_t = cuda.to_gpu(x_t) y_t = cuda.to_gpu(y_t) state, loss_i = model.forward_one_step(x_t, y_t, state, dropout_ratio=args.dropout)
def train(self, words, steps, batchsize=100, sequence_length=10): """ Train the Predictor's model on words for steps number of steps. """ whole_len = len(words) train_data = np.ndarray(whole_len, dtype=np.int32) jumps = steps * sequence_length # Initialize training data and maybe vocab. if self.vocab is None: vocab_initializing = True self.vocab = {} for i, word in enumerate(words): if vocab_initializing: if word not in self.vocab: self.vocab[word] = len(self.vocab) train_data[i] = self.vocab[word] vocab_initializing = False print 'corpus length:', len(words) print 'self.vocab size:', len(self.vocab) # Initialize base model (if we need to) if self.model is None: self.model = BaseRNN(len(self.vocab), self.units) if self.gpu >= 0: cuda.get_device(self.gpu).use() self.model.to_self.gpu() optimizer = optimizers.RMSprop(lr=self.settings.learning_rate, alpha=self.settings.decay_rate, eps=1e-8) optimizer.setup(self.model) jumpsPerEpoch = whole_len / batchsize epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(self.units, batchsize=batchsize) if self.gpu >= 0: accum_loss = Variable(cuda.zeros(())) for _, value in state.items(): value.data = cuda.to_self.gpu(value.data) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) print 'going to train {} iterations'.format(steps) for i in xrange(jumps): x_batch = np.array([ train_data[(jumpsPerEpoch * j + i) % whole_len] for j in xrange(batchsize) ]) y_batch = np.array([ train_data[(jumpsPerEpoch * j + i + 1) % whole_len] for j in xrange(batchsize) ]) if self.gpu >= 0: x_batch = cuda.to_self.gpu(x_batch) y_batch = cuda.to_self.gpu(y_batch) state, loss_i = self.model.forward_one_step( x_batch, y_batch, state, dropout_ratio=self.settings.dropout) accum_loss += loss_i if (i + 1) % sequence_length == 0: now = time.time() print '{}/{}, train_loss = {}, time = {:.2f}'.format( (i + 1) / sequence_length, steps, accum_loss.data / sequence_length, now - cur_at) cur_at = now optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate if self.gpu >= 0: accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) optimizer.clip_grads(self.settings.grad_clip) optimizer.update() if (i + 1) % jumpsPerEpoch == 0: epoch += 1 if epoch >= self.settings.learning_rate_decay_after: optimizer.lr *= self.settings.learning_rate_decay print 'decayed self.settings.learning rate by a factor {} to {}'.format( self.settings.learning_rate_decay, optimizer.lr)
def train(self, words, steps, batchsize=100, sequence_length=10): """ Train the Predictor's model on words for steps number of steps. """ whole_len = len(words) train_data = np.ndarray(whole_len, dtype=np.int32) jumps = steps * sequence_length # Initialize training data and maybe vocab. if self.vocab is None: vocab_initializing = True self.vocab = {} for i, word in enumerate(words): if vocab_initializing: if word not in self.vocab: self.vocab[word] = len(self.vocab) train_data[i] = self.vocab[word] vocab_initializing = False print 'corpus length:', len(words) print 'self.vocab size:', len(self.vocab) # Initialize base model (if we need to) if self.model is None: self.model = BaseRNN(len(self.vocab), self.units) if self.gpu >= 0: cuda.get_device(self.gpu).use() self.model.to_self.gpu() optimizer = optimizers.RMSprop(lr=self.settings.learning_rate, alpha=self.settings.decay_rate, eps=1e-8) optimizer.setup(self.model) jumpsPerEpoch = whole_len / batchsize epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(self.units, batchsize=batchsize) if self.gpu >= 0: accum_loss = Variable(cuda.zeros(())) for _, value in state.items(): value.data = cuda.to_self.gpu(value.data) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) print 'going to train {} iterations'.format(steps) for i in xrange(jumps): x_batch = np.array([train_data[(jumpsPerEpoch * j + i) % whole_len] for j in xrange(batchsize)]) y_batch = np.array([train_data[(jumpsPerEpoch * j + i + 1) % whole_len] for j in xrange(batchsize)]) if self.gpu >= 0: x_batch = cuda.to_self.gpu(x_batch) y_batch = cuda.to_self.gpu(y_batch) state, loss_i = self.model.forward_one_step(x_batch, y_batch, state, dropout_ratio=self.settings.dropout) accum_loss += loss_i if (i + 1) % sequence_length == 0: now = time.time() print '{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/sequence_length, steps, accum_loss.data / sequence_length, now-cur_at) cur_at = now optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate if self.gpu >= 0: accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) optimizer.clip_grads(self.settings.grad_clip) optimizer.update() if (i + 1) % jumpsPerEpoch == 0: epoch += 1 if epoch >= self.settings.learning_rate_decay_after: optimizer.lr *= self.settings.learning_rate_decay print 'decayed self.settings.learning rate by a factor {} to {}'.format(self.settings.learning_rate_decay, optimizer.lr)
if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) optimizer.setup(model) whole_len = train_data.shape[0] jump = int(whole_len / batchsize) epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(n_units, batchsize=batchsize) if args.gpu >= 0: accum_loss = Variable(cuda.zeros(())) for key, value in list(state.items()): value.data = cuda.to_gpu(value.data) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) print('going to train {} iterations'.format(jump * n_epochs)) for i in range(jump * n_epochs): x_batch = np.array([train_data[(jump * j + i) % whole_len] for j in range(batchsize)]) y_batch = np.array([train_data[(jump * j + i + 1) % whole_len] for j in range(batchsize)]) if args.gpu >=0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch)
def trainEvaluate(dataset, index): """現在のニューラルネットワーク評価処理""" # モデルに影響を与えないようにコピーする evaluator = s.dnn.model.copy() # to use different state evaluator.reset_state() # initialize state evaluator.train = False # dropout does nothing evdnn = Dnn(evaluator, None) # メモリ領域確保して学習&教師データ取得 xa_cpu = evaluator.buildMiniBatchData(dataset, np.asarray([index]), rnnLen, rnnStep) ta_cpu = np.zeros(shape=(rnnLen, 1, s.dnnOut), dtype=np.float32) for i in range(rnnLen): ta_cpu[i,0,:] = trainGetT(dataset, index + i * rnnStep) if s.xp == np: xa_gpu = xa_cpu ta_gpu = ta_cpu losses = np.zeros((rnnLen,), dtype=np.float32) else: xa_gpu = cuda.to_gpu(xa_cpu) ta_gpu = cuda.to_gpu(ta_cpu) losses = cuda.zeros((rnnLen,), dtype=np.float32) # 必要ならグラフ表示初期化 if s.grEnable: if s.xp == np: ya_gpu = np.zeros((rnnLen,), dtype=np.float32) else: ya_gpu = cuda.zeros((rnnLen,), dtype=np.float32) # グラフにデータを描画する plt.title(s.trainDataFile + " : " + str(index)) # グラフタイトル xvals = dataset[:, index : index + s.minEvalLen] tvals = ta_cpu.reshape((rnnLen,)) glIn1.set_ydata(xvals[0]) glIn2.set_ydata(xvals[1]) glIn3.set_ydata(xvals[2]) glIn4.set_ydata(xvals[3]) glTeach.set_ydata(tvals) subPlot1.set_ylim(f.npMaxMin(xvals)) # RNNを評価 for i in range(rnnLen): y, loss = evdnn.evaluate(xa_gpu[i], ta_gpu[i]) losses[i : i + 1] = loss.data if s.grEnable: ya_gpu[i : i + 1] = y.data[0, 0 : 1] # 必要ならグラフ表示 if s.grEnable: if s.xp == np: yvals = ya_gpu else: yvals = cuda.to_cpu(ya_gpu) glOut.set_ydata(yvals) subPlot2.set_ylim(f.npMaxMin([tvals, yvals])) plt.draw() plt.pause(0.001) try: return math.exp(float(losses.sum()) / rnnLen) except Exception as e: print("evaluate overflow") return 0.0