def update_upscaler(self, lam1, lam2): opt_gen = self.get_optimizer('gen_up') opt_dis = self.get_optimizer('dis_up') xp = self.upscaler.xp batch_a = self.get_iterator('main').next() x_s_nn = Variable(xp.asarray([b[0] for b in batch_a]).astype('f')) x_l = Variable(xp.asarray([b[1] for b in batch_a]).astype('f')) x_s_nn_l = self.upscaler.gen(x_s_nn) y_s_nn_l = self.upscaler.dis(x_s_nn, x_s_nn_l) y_l = self.upscaler.dis(x_s_nn, x_l) self.upscaler.gen.cleargrads() loss_gen = self.loss_gen(self.upscaler.gen, x_s_nn_l, x_l, y_s_nn_l, lam1, lam2) loss_gen.backward() opt_gen.update() x_l.unchain_backward() x_s_nn.unchain_backward() self.upscaler.dis.cleargrads() loss_dis = self.loss_dis(self.upscaler.dis, y_l, y_s_nn_l) loss_dis.backward() opt_dis.update()
def update_downscaler(self, lam1, lam2): opt_gen = self.get_optimizer('gen_down') opt_dis = self.get_optimizer('dis_down') xp = self.downscaler.xp batch_b = self.get_iterator('trainB').next() x_s_rand = Variable(xp.asarray([b[0] for b in batch_b]).astype('f')) x_s = Variable(xp.asarray([b[1] for b in batch_b]).astype('f')) self.upscaler.gen.fix_broken_batchnorm() with chainer.using_config('train', False), chainer.using_config( 'enable_back_prop', False): x_sl = self.upscaler.gen(x_s_rand) x_sl.unchain_backward() x_sls = self.downscaler.gen(x_sl) y_sls = self.downscaler.dis(x_sl, x_sls) y_s = self.downscaler.dis(x_sl, x_s) self.downscaler.gen.cleargrads() loss_gen = self.loss_gen(self.downscaler.gen, x_sls, x_s, y_sls, lam1, lam2) loss_gen.backward() opt_gen.update() x_s.unchain_backward() x_sls.unchain_backward() self.downscaler.dis.cleargrads() loss_dis = self.loss_dis(self.downscaler.dis, y_s, y_sls) loss_dis.backward() opt_dis.update()
def update_core(self): enc_optimizer = self.get_optimizer('enc') dec_optimizer = self.get_optimizer('dec') # dis_optimizer = self.get_optimizer('dis') enc, dec = self.enc, self.dec #, self.dis xp = enc.xp batch = self.get_iterator('main').next() batchsize = len(batch) in_ch = batch[0][0].shape[0] out_ch = batch[0][1].shape[0] w_in = 256 # change w_out = 256 # change x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f") t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f") for i in range(batchsize): x_in[i, :] = xp.asarray(batch[i][0]) t_out[i, :] = xp.asarray(batch[i][1]) x_in = Variable(x_in) z = enc(x_in) x_out = dec(z) # y_fake = dis(x_in, x_out) # 入力と, generateされたものは偽物ペア # y_real = dis(x_in, t_out) # 入力と, 本物のペア enc_optimizer.update(self.loss_enc, enc, x_out, t_out) for z_ in z: z_.unchain_backward() dec_optimizer.update(self.loss_dec, dec, x_out, t_out) x_in.unchain_backward() x_out.unchain_backward()
def update_core(self): opt_gen = self.get_optimizer('gen') opt_dis = self.get_optimizer('dis') gen, dis = self.model.gen, self.model.dis xp = gen.xp batch = self.get_iterator('main').next() batchsize = len(batch) x_in = Variable(xp.asarray([b[0] for b in batch]).astype('f')) t_out = Variable(xp.asarray([b[1] for b in batch]).astype('f')) x_out = gen(x_in) y_fake = dis(x_in, x_out) y_real = dis(x_in, t_out) gen.cleargrads() self.loss_gen(gen, x_out, t_out, y_fake).backward() opt_gen.update() x_in.unchain_backward() x_out.unchain_backward() dis.cleargrads() self.loss_dis(dis, y_real, y_fake).backward() opt_dis.update()
def optimizeCRNN(iterNum,maxIndex,indicies): batchSize = 1000 model = EvalCRNN(maxIndex,500) print(len(indicies),computeEntropy(maxIndex,indicies)) learningRate = 0.001 epoch = 3 for j in range(epoch): my_optimizer = optimizers.RMSpropGraves(lr = learningRate) my_optimizer.setup(model) my_optimizer.add_hook(optimizer.GradientClipping(1)) model.cRNN.reset() loss = Variable(np.array([[0]])) for i in range(iterNum): t1 = time.clock() model.zerograds() loss.unchain_backward() loss = model(indicies[batchSize*i:batchSize*(i+1)],iterNum*batchSize) loss.backward() t2 = time.clock() msg = "iter: " + str(i + iterNum * j + 1) + "/" + str(iterNum * epoch) msgLoss = "loss: " + str(loss.data/batchSize) msgNorm = "grad: " + str(my_optimizer.compute_grads_norm()) msgTime = "time: " + str(t2 - t1) + " seconds" print(msgLoss,msgNorm,msg,msgTime) my_optimizer.update() learningRate *= 0.50 print(model(indicies[batchSize*(iterNum):batchSize*(iterNum+10)]).data/(batchSize*10)) return model.cRNN
def update_core(self): enc_optimizer = self.get_optimizer('enc') dec_optimizer = self.get_optimizer('dec') dis_optimizer = self.get_optimizer('dis') enc, dec, dis = self.enc, self.dec, self.dis xp = enc.xp batch = self.get_iterator('main').next() batchsize = len(batch) in_ch = batch[0][0].shape[0] """ Edit g """ #print("Batch size", len(batch)) #print("Batch all", batch) #print("Batch -1[0]", batch[-1][0]) #print("Batch -1[1]", batch[-1][1]) #print("Batch -1[0][0]", batch[-1][0][0]) """ 最後のインデックスにアクセスして、情報を取り出す """ """ これは、バッチサイズが1のときのみ有効であるからして、気をつけること """ #path_through1 = [] #for in_contain in batch[-1][0][-1]: #print("IN_CONTAIN", in_contain) # for c in in_contain: # path_through1.append(c) #print("path-through len", len(path_through1)) """ ここまで """ out_ch = batch[0][1].shape[0] w_in = 256 w_out = 256 x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f") t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f") for i in range(batchsize): x_in[i,:] = xp.asarray(batch[i][0]) t_out[i,:] = xp.asarray(batch[i][1]) x_in = Variable(x_in) z = enc(x_in, test=False) """ このzベクトルを変化させれば、任意の方向性に持っていくことができる """ #print("z", z) """ Zを直接編集するのは危険なので、decの引数を増やして対処したほうが良さそう """ #x_out = dec(z, path_through1, test=False) x_out = dec(z, test=False) y_fake = dis(x_in, x_out, test=False) y_real = dis(x_in, t_out, test=False) enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake) for z_ in z: z_.unchain_backward() dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake) x_in.unchain_backward() x_out.unchain_backward() dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
def update_core(self): #print("update_coreだよ") enc_optimizer = self.get_optimizer('enc') dec_optimizer = self.get_optimizer('dec') dis_optimizer = self.get_optimizer('dis') enc, dec, dis = self.enc, self.dec, self.dis xp = enc.xp batch = self.get_iterator('main').next() batchsize = len(batch) #in_ch = batch[0][0].shape[0] #out_ch = batch[0][1].shape[0] in_ch = 3 out_ch = 3 w_in = 115 h_in = 149 w_out = 115 h_out = 149 x_in = xp.zeros((batchsize, in_ch, w_in, h_in)).astype("f") t_out = xp.zeros((batchsize, out_ch, w_out, h_out)).astype("f") for i in range(batchsize): x_in[i, :] = xp.asarray(batch[i][0]) t_out[i, :] = xp.asarray(batch[i][1]) x_in = Variable(x_in) #print("ここまで~") z = enc(x_in) #print("ちょい時間かかる~") x_out = dec(z) #print("ほんのり時間かかる~") y_fake = dis(x_in, x_out) y_real = dis(x_in, t_out) #print("disできた~") enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake) #print("すごい時間かかる~") for z_ in z: z_.unchain_backward() dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake) x_in.unchain_backward() x_out.unchain_backward() dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
class Simple(LSTMBase): def __init__(self, vocab_size, dim_embed=33 * 3, dim1=400, dim2=400, dim3=200, class_size=None): if class_size is None: class_size = vocab_size super(Simple, self).__init__( embed2=L.EmbedID(vocab_size, dim_embed), lay2=L.LSTM(dim_embed, dim1, forget_bias_init=0), lay_int=L.LSTM(dim1, dim2, forget_bias_init=0), lin1=L.Linear(dim2, dim3), lin2=L.Linear(dim3, class_size), ) self.vocab_size = vocab_size try: cuda.check_cuda_available() self.to_gpu() print 'run on the GPU.' except: print 'run on the CPU.' self.dim_embed = dim_embed self.optimizer = optimizers.MomentumSGD() self.optimizer.setup(self) self.loss_var = Variable(xp.zeros((), dtype=np.float32)) self.reset_state() def __call__(self, xs, train): x_3gram = xs[0] sp2 = xs[1] x_uni = x_3gram[:, 0] y = Variable(x_uni, volatile=not train) y = self.embed2(y) y2 = self.lay2(y) y2 = self.lay_int(y2) y = y2 y = self.lin1(F.dropout(y, train=train)) y = F.relu(y) y = self.lin2(F.dropout(y, train=train)) return y def reset_state(self): if self.loss_var is not None: self.loss_var.unchain_backward() # for safty self.loss_var = Variable(xp.zeros((), dtype=xp.float32)) # reset loss_var self.lay2.reset_state() self.lay_int.reset_state() return
class StatefulAgent(Agent): def __init__(self, model, optimizer=None, gpu=-1, cutoff=None, last=False): super(StatefulAgent, self).__init__(model, optimizer=optimizer, gpu=gpu, last=last, cutoff=cutoff) # cutoff for BPTT self.cutoff = cutoff # whether to update from loss in last step only self.last = last # keep track of loss for truncated BPTT self.loss = Variable(self.xp.zeros((), 'float32')) def run(self, data, train=True, idx=None, final=False): if (idx) % self.cutoff == 0: self.reset() loss = self.model(map(lambda x: Variable(self.xp.asarray(x)), data), train=True) if self.last: # used in case we propagate back at end of trials only if ((idx + 1) % self.cutoff) == 0: self.loss = loss else: loss = Variable(self.xp.zeros((), 'float32')) else: self.loss += loss # normalize by number of datapoints in minibatch _loss = float(loss.data) # backpropagate if we reach the cutoff for truncated backprop or if we processed the last batch if train and ((self.cutoff and ((idx + 1) % self.cutoff) == 0) or final): self.optimizer.zero_grads() self.loss.backward() self.loss.unchain_backward() self.optimizer.update() self.loss = Variable(self.xp.zeros((), 'float32')) if not train: self.loss.unchain_backward() return _loss
def train(self, x): # Encoder/Decoder h = self.encoder(x) xp = cuda.get_array_module(x) z = Variable( cuda.to_gpu( xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device)) hz = self.generator0(z) x_rec = self.decoder(h, hz) l_rec = self.recon_loss(x, x_rec) self.cleargrads() l_rec.backward() self.optimizer_enc.update() self.optimizer_dec.update() # Discriminator h = Variable(h.data) # disconnect h.unchain_backward() xp = cuda.get_array_module(x) z = Variable( cuda.to_gpu( xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device)) hz = self.generator0(z) x_gen = self.generator(h, hz) d_x_gen = self.discriminator(x_gen, h) d_x_real = self.discriminator(x, h) l_dis = self.lsgan_loss(d_x_gen, d_x_real) self.cleargrads() l_dis.backward() self.optimizer_dis.update() # Generator xp = cuda.get_array_module(x) z = Variable( cuda.to_gpu( xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device)) hz = self.generator0(z) x_gen = self.generator(h, hz) d_x_gen = self.discriminator(x_gen, h) h_gen = self.encoder(x_gen) l_gen = self.lsgan_loss(d_x_gen) self.cleargrads() l_gen.backward() self.optimizer_dec.update() self.optimizer_gen.update()
def update_core(self): enc_optimizer = self.get_optimizer('enc') dec_optimizer = self.get_optimizer('dec') dis_optimizer = self.get_optimizer('dis') enc, dec, dis = self.enc, self.dec, self.dis xp = enc.xp batch = self.get_iterator('main').next() batchsize = len(batch) in_ch = batch[0][0].shape[0] out_ch = batch[0][1].shape[0] w_in = 256 w_out = 256 x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f") t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f") for i in range(batchsize): x_in[i, :] = xp.asarray(batch[i][0]) t_out[i, :] = xp.asarray(batch[i][1]) x_in = Variable(x_in) z = enc(x_in, test=False) x_out = dec(z, test=False) y_fake = dis(x_in, x_out, test=False) y_real = dis(x_in, t_out, test=False) enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake, lam1=self.lam1, lam2=self.lam2) for z_ in z: z_.unchain_backward() dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake, lam1=self.lam1, lam2=self.lam2) x_in.unchain_backward() x_out.unchain_backward() dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
def update_core(self): enc_optimizer = self.get_optimizer('enc') dec_optimizer = self.get_optimizer('dec') dis_optimizer = self.get_optimizer('dis') enc, dec, dis = self.enc, self.dec, self.dis xp = enc.xp batch = self.get_iterator('main').next() batchsize = len(batch) in_ch = batch[0][0].shape[0] out_ch = batch[0][1].shape[0] # w_in = 256 # w_out = 256 w_in = batch[0][0].shape[1] h_in = batch[0][0].shape[2] w_out = w_in h_out = h_in x_in = xp.zeros((batchsize, in_ch, w_in, h_in)).astype("f") t_out = xp.zeros((batchsize, out_ch, w_out, h_out)).astype("f") for i in range(batchsize): #todo batchsize 2以上だとスペクトログラム画像の横サイズ(時間方向)がサンプル間で異なることがあり、ndarrayへマージできず学習がエラー停止する。強制的にデータ除外することで対応中。 if (x_in[i, :].shape != batch[i][0].shape): print( "skipped training_audio_sample because spectrogram shape does not match.(comes from program bug.)\r\n expected:{}, actual:{}" .format(x_in[i, :].shape, batch[i][0].shape)) continue x_in[i, :] = xp.asarray(batch[i][0]) t_out[i, :] = xp.asarray(batch[i][1]) x_in = Variable(x_in) z = enc(x_in) x_out = dec(z) y_fake = dis(x_in, x_out) y_real = dis(x_in, t_out) enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake) for z_ in z: z_.unchain_backward() dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake) x_in.unchain_backward() x_out.unchain_backward() dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
def update_core(self): enc_optimizer = self.get_optimizer('enc') dec_optimizer = self.get_optimizer('dec') dis_optimizer = self.get_optimizer('dis') enc, dec, dis = self.enc, self.dec, self.dis xp = enc.xp batch = self.get_iterator('main').next() batchsize = len(batch) in_ch = batch[0][0].shape[0] out_ch = batch[0][1].shape[0] w_in_h = 512 w_in_w = 128 w_out_h = 512 w_out_w = 128 x_in = xp.zeros((batchsize, in_ch, w_in_h, w_in_w)).astype("f") t_out = xp.zeros((batchsize, out_ch, w_out_h, w_out_w)).astype("f") for i in range(batchsize): x_in[i,:] = xp.asarray(batch[i][0]) t_out[i,:] = xp.asarray(batch[i][1]) x_in = Variable(x_in) z = enc(x_in) x_out = dec(z) st_in_h = int(w_in_h / 3) st_in_w = int(w_in_w / 4 * 3) #x_in = Variable(x_in.data[:,:,:,w_in_st:w_in_w]) x_in = x_in[:,:,st_in_h*2:w_in_h,st_in_w:w_in_w] x_out = x_out[:,:,st_in_h*2:w_in_h,st_in_w:w_in_w] t_out = t_out[:,:,st_in_h*2:w_in_h,st_in_w:w_in_w] y_fake = dis(x_in, x_out) y_real = dis(x_in, t_out) enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake) for z_ in z: z_.unchain_backward() dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake) x_in.unchain_backward() x_out.unchain_backward() dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
def train(self, x): # Encoder/Decoder h = self.encoder(x) xp = cuda.get_array_module(x) z = Variable(cuda.to_gpu(xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device)) hz = self.generator0(z) x_rec = self.decoder(h, hz) l_rec = self.recon_loss(x, x_rec) self.cleargrads() l_rec.backward() self.optimizer_enc.update() self.optimizer_dec.update() # Discriminator h = Variable(h.data) # disconnect h.unchain_backward() xp = cuda.get_array_module(x) z = Variable(cuda.to_gpu(xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device)) hz = self.generator0(z) x_gen = self.generator(h, hz) d_x_gen = self.discriminator(x_gen, h) d_x_real = self.discriminator(x, h) l_dis = self.lsgan_loss(d_x_gen, d_x_real) self.cleargrads() l_dis.backward() self.optimizer_dis.update() # Generator xp = cuda.get_array_module(x) z = Variable(cuda.to_gpu(xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device)) hz = self.generator0(z) x_gen = self.generator(h, hz) d_x_gen = self.discriminator(x_gen, h) h_gen = self.encoder(x_gen) l_gen = self.lsgan_loss(d_x_gen) self.cleargrads() l_gen.backward() self.optimizer_dec.update() self.optimizer_gen.update()
def train(self, data): if not self.cutoff: cutoff = data.nbatches else: cutoff = self.cutoff self.model.predictor.reset_state() cumloss = self.xp.zeros((), 'float32') loss = Variable(self.xp.zeros((), 'float32')) # check if we are in train or test mode (used e.g. for dropout) self.model.predictor.test = False self.model.predictor.train = True for _x, _t in data: x = Variable(_x) t = Variable(_t) self.model.predictor(x) # backpropagate if we reach the cutoff for truncated backprop or if we processed the last batch if data.step % cutoff == 0 or data.step == data.nbatches: loss += self.model(x, t) self.optimizer.zero_grads() loss.backward() loss.unchain_backward() self.optimizer.update() #self.model.predictor[0][0].U.W.data[10:,:]=0 cumloss += loss.data loss = Variable(self.xp.zeros((), 'float32')) self.model.predictor.reset_state() return float(cumloss / (data.batch_ind.shape[1]))
def update_core(self): enc_optimizer = self.get_optimizer('enc') dec_optimizer = self.get_optimizer('dec') dis_optimizer = self.get_optimizer('dis') enc, dec, dis = self.enc, self.dec, self.dis xp = enc.xp batch = self.get_iterator('main').next() batchsize = len(batch) in_ch = batch[0][0].shape[0] out_ch = batch[0][1].shape[0] w_in = 256 w_out = 256 x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f") t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f") for i in range(batchsize): x_in[i, :] = xp.asarray(batch[i][0]) t_out[i, :] = xp.asarray(batch[i][1]) x_in = Variable(x_in) z = enc(x_in) x_out = dec(z) y_fake = dis(x_in, x_out) y_real = dis(x_in, t_out) enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake) for z_ in z: z_.unchain_backward() dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake) x_in.unchain_backward() x_out.unchain_backward() dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
def train(self, words, steps, batchsize=100, sequence_length=10): """ Train the Predictor's model on words for steps number of steps. """ whole_len = len(words) train_data = np.ndarray(whole_len, dtype=np.int32) jumps = steps * sequence_length # Initialize training data and maybe vocab. if self.vocab is None: vocab_initializing = True self.vocab = {} for i, word in enumerate(words): if vocab_initializing: if word not in self.vocab: self.vocab[word] = len(self.vocab) train_data[i] = self.vocab[word] vocab_initializing = False print 'corpus length:', len(words) print 'self.vocab size:', len(self.vocab) # Initialize base model (if we need to) if self.model is None: self.model = BaseRNN(len(self.vocab), self.units) if self.gpu >= 0: cuda.get_device(self.gpu).use() self.model.to_self.gpu() optimizer = optimizers.RMSprop(lr=self.settings.learning_rate, alpha=self.settings.decay_rate, eps=1e-8) optimizer.setup(self.model) jumpsPerEpoch = whole_len / batchsize epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(self.units, batchsize=batchsize) if self.gpu >= 0: accum_loss = Variable(cuda.zeros(())) for _, value in state.items(): value.data = cuda.to_self.gpu(value.data) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) print 'going to train {} iterations'.format(steps) for i in xrange(jumps): x_batch = np.array([ train_data[(jumpsPerEpoch * j + i) % whole_len] for j in xrange(batchsize) ]) y_batch = np.array([ train_data[(jumpsPerEpoch * j + i + 1) % whole_len] for j in xrange(batchsize) ]) if self.gpu >= 0: x_batch = cuda.to_self.gpu(x_batch) y_batch = cuda.to_self.gpu(y_batch) state, loss_i = self.model.forward_one_step( x_batch, y_batch, state, dropout_ratio=self.settings.dropout) accum_loss += loss_i if (i + 1) % sequence_length == 0: now = time.time() print '{}/{}, train_loss = {}, time = {:.2f}'.format( (i + 1) / sequence_length, steps, accum_loss.data / sequence_length, now - cur_at) cur_at = now optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate if self.gpu >= 0: accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) optimizer.clip_grads(self.settings.grad_clip) optimizer.update() if (i + 1) % jumpsPerEpoch == 0: epoch += 1 if epoch >= self.settings.learning_rate_decay_after: optimizer.lr *= self.settings.learning_rate_decay print 'decayed self.settings.learning rate by a factor {} to {}'.format( self.settings.learning_rate_decay, optimizer.lr)
def findNumEpoch(architecture, waves, trues, labels, infos, gpu_id, waveFs): if cupy is not None and gpu_id >= 0: xp = cupy cupy.cuda.Device(gpu_id).use() else: xp = np valIndex = coreTestIndex(infos) np.random.seed(0) insIndex, devIndex = traGroupIndex(infos, 2) insIndex = np.array(insIndex) insLabelIndexTime = makeLabelIndexTime(insIndex, labels, trues) insLabelSize = 2**2 devEpoch = 2**5 convergenceEpoch = 2**5 * devEpoch devBatchSizeUpper = 2**8 devSegmentSecUpper = 0.1 devSegmentLenUpper = int(devSegmentSecUpper * waveFs) devIndex = sorted(devIndex, key=lambda i: len(waves[i])) devIndex = np.array(devIndex) devBatchIndex = np.array_split( devIndex, int(np.ceil(len(devIndex) / devBatchSizeUpper))) devLabelSize = np.zeros(len(labels), int32) for i in devIndex: for li, la in enumerate(labels): devLabelSize[li] += (trues[i] == li).sum() inputLength = totalInputLength(architecture) np.random.seed() seed = np.random.randint(0, np.iinfo(int32).max) np.random.seed(seed) net = Net(len(labels), architecture, functions.elu) opt = optimizers.Adam(1e-4) # opt=Eve(1e-4) opt.setup(net) if gpu_id >= 0: net.to_gpu(gpu_id) remainingInsLabelIndexTime = [ np.random.permutation(lt) for lt in insLabelIndexTime ] epoch = 0 bestEpoch = 0 epochIncorrect = {} while epoch < bestEpoch + convergenceEpoch: for li, lit in enumerate(remainingInsLabelIndexTime): if len(lit) < insLabelSize: remainingInsLabelIndexTime[li] = np.concatenate( (lit, np.random.permutation(insLabelIndexTime[li]))) x, tr = makeInpTru(labels, insLabelSize, inputLength, remainingInsLabelIndexTime, waves, trues) x = x[:, newaxis, :, newaxis] x = xp.asarray(x) x = Variable(x) x = net.callSingle(x, True) tr = tr[..., newaxis, newaxis] tr = xp.asarray(tr) e = functions.softmax_cross_entropy(x, tr, normalize=True) net.cleargrads() e.backward() e.unchain_backward() opt.update() # opt.update(loss=e.data) if epoch % devEpoch != devEpoch - 1: epoch += 1 continue incorrect = xp.zeros(len(labels), int32) with chainer.using_config("enable_backprop", False): for index in devBatchIndex: waveLen = len(waves[index[-1]]) segmentTimes = np.array_split( np.arange(waveLen), int(np.ceil(waveLen / devSegmentLenUpper))) net.reset() for si, segTime in enumerate(segmentTimes): t0 = segTime[0] t1 = segTime[-1] + 1 x = np.zeros((len(index), t1 - t0), float32) tr = -np.ones((len(index), t1 - t0), int32) for xi, wi in enumerate(index): if len(waves[wi]) > t0: w = waves[wi][t0:t1] x[xi, :len(w)] = w if len(waves[wi]) > t0: tr[xi, :len(w)] = trues[wi][t0:t1] x = x[:, newaxis, :, newaxis] x = xp.asarray(x) x = Variable(x) x = net(x, False) x.unchain_backward() x = xp.argmax(x.data, axis=1) tr = tr[..., newaxis] tr = xp.asarray(tr) for li, la in enumerate(labels): incorrect[li] += (x[tr == li] != li).sum() net.reset() if cupy is not None: incorrect = cupy.asnumpy(incorrect) incorrect = (incorrect / devLabelSize).mean() print("epoch", epoch, "incorrect", incorrect) if len(epochIncorrect) == 0 or incorrect < min( [epochIncorrect[ep] for ep in epochIncorrect]): bestEpoch = epoch epochIncorrect[epoch] = incorrect epoch += 1 devEpochs = np.array(sorted(epochIncorrect), int32) epochIncorrect = np.array([epochIncorrect[ep] for ep in devEpochs]) bestIncorrect = epochIncorrect.min() return bestEpoch, bestIncorrect, seed
class Bigram(LSTMBase): def __init__(self, vocab_size, dim_embed=33 * 3, dim1=400, dim2=400, dim3=200, class_size=None): if class_size is None: class_size = vocab_size super(Bigram, self).__init__( embed_uni=L.EmbedID(vocab_size, dim_embed), embed_bi=L.EmbedID(vocab_size * vocab_size, dim_embed), lay_uni=L.LSTM(dim_embed, dim1, forget_bias_init=0), lay_bi=L.StatelessLSTM(dim_embed, dim1, forget_bias_init=0), lay_int=L.LSTM(dim1 * 3, dim2, forget_bias_init=0), lin1=L.Linear(dim2, dim3), lin2=L.Linear(dim3, class_size), ) self.vocab_size = vocab_size try: cuda.check_cuda_available() self.to_gpu() print 'run on the GPU.' except: print 'run on the CPU.' self.dim_embed = dim_embed self.optimizer = optimizers.MomentumSGD() self.optimizer.setup(self) self.loss_var = Variable(xp.zeros((), dtype=np.float32)) self.reset_state() def __call__(self, xs, train): x_3gram = xs[0] sp2 = xs[1] x_uni = x_3gram[:, 0] y = Variable(x_uni, volatile=not train) y = self.embed_uni(y) y_uni = self.lay_uni(y) ## bigram です。 x_bi = x_3gram[:, 0] * self.vocab_size + x_3gram[:, 1] y = Variable(x_bi, volatile=not train) y = self.embed_bi(y) if self.is_odd: self.c_odd, self.h_odd = self.lay_bi(self.c_odd, self.h_odd, y) if self.h_evn is None: self.h_evn = Variable(xp.zeros_like(self.h_odd.data), volatile=not train) y = concat.concat((y_uni, self.h_odd, self.h_evn)) else: self.c_evn, self.h_evn = self.lay_bi(self.c_evn, self.h_evn, y) y = concat.concat((y_uni, self.h_evn, self.h_odd)) self.is_odd = not self.is_odd y = self.lay_int(y) y = self.lin1(F.dropout(y, train=train)) y = F.relu(y) y = self.lin2(F.dropout(y, train=train)) return y def reset_state(self): if self.loss_var is not None: self.loss_var.unchain_backward() # 念のため self.loss_var = Variable(xp.zeros((), dtype=xp.float32)) # reset loss_var self.lay_uni.reset_state() self.is_odd = True self.c_odd = None self.c_evn = None self.h_odd = None self.h_evn = None self.lay_int.reset_state() return
def train_dcgan_labeled(evol, dis, proj, epoch0=0): global epoch o_evol = optimizers.Adam(alpha=0.0002, beta1=0.5) o_evol.setup(evol) o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5) o_dis.setup(dis) o_proj = optimizers.Adam(alpha=0.0002, beta1=0.5) o_proj.setup(proj) if not args.fresh_start: serializers.load_hdf5("%s/dcgan_model_evol.h5" % (out_model_dir), evol) serializers.load_hdf5("%s/dcgan_state_evol.h5" % (out_model_dir), o_evol) serializers.load_hdf5("%s/dcgan_model_dis.h5" % (out_model_dir), dis) serializers.load_hdf5("%s/dcgan_state_dis.h5" % (out_model_dir), o_dis) serializers.load_hdf5("%s/dcgan_model_proj.h5" % (out_model_dir), proj) serializers.load_hdf5("%s/dcgan_state_proj.h5" % (out_model_dir), o_proj) o_evol.add_hook(chainer.optimizer.WeightDecay(0.00001)) o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001)) o_proj.add_hook(chainer.optimizer.WeightDecay(0.00001)) vis_process = None for epoch in xrange(epoch0, n_epoch): for train_ctr in xrange(0, n_train, batchsize): print "epoch:", epoch, "train:", train_ctr, # discriminator # 0: from dataset # 1: from noise good_movie = True prediction_movie = n_movie * [None] try: current_movie = load_movie() except: continue for i in range(n_timeseries - 1): if current_movie[i] is None: good_movie = False else: prediction_movie[i] = current_movie[i] if not good_movie: continue for i in range(n_timeseries - 1, n_movie): prediction_movie[i] = evolve_image( evol, proj, prediction_movie[i - n_timeseries + 1:i]) if train_ctr % save_interval == 0: for answer_mode in ['predict', 'observe']: for offset in [n_timeseries, 16, 32, 64, 119]: if offset >= n_movie: continue img_prediction = prediction_movie[offset] if answer_mode == 'observe': img_prediction = current_movie[offset] if img_prediction is None: continue imgfn = '%s/futuresun_%d_%04d_%s+%03d.png' % ( out_image_dir, epoch, train_ctr, answer_mode, offset) plt.rcParams['figure.figsize'] = (12.0, 12.0) plt.close('all') plt.imshow(img_prediction, vmin=0, vmax=1.4) plt.suptitle(imgfn) plt.savefig(imgfn) subprocess.call("cp %s ~/public_html/futuresun/" % (imgfn), shell=True) # we don't have enough disk for history history_dir = 'history/' #%d-%d'%(epoch, train_ctr) subprocess.call("mkdir -p %s " % (history_dir), shell=True) subprocess.call("cp %s/*.h5 %s " % (out_model_dir, history_dir), shell=True) if epoch > 0 or train_ctr > 0: print 'saving model...' serializers.save_hdf5( "%s/dcgan_model_evol.h5" % (out_model_dir), evol) serializers.save_hdf5( "%s/dcgan_state_evol.h5" % (out_model_dir), o_evol) serializers.save_hdf5( "%s/dcgan_model_dis.h5" % (out_model_dir), dis) serializers.save_hdf5( "%s/dcgan_state_dis.h5" % (out_model_dir), o_dis) serializers.save_hdf5( "%s/dcgan_model_proj.h5" % (out_model_dir), proj) serializers.save_hdf5( "%s/dcgan_state_proj.h5" % (out_model_dir), o_proj) print '...saved.' movie_in = None movie_out = None movie_out_predict = None evol_scores = {} proj_scores = {} matsuoka_shuzo = {} shuzo_evoke_timestep = [] difficulties = ['normal', 'hard'] vis_kit = {} for difficulty in difficulties: evol_scores[difficulty] = [0.0] proj_scores[difficulty] = [0.0] matsuoka_shuzo[difficulty] = True vis_kit[difficulty] = None matsuoka_shuzo[ 'normal'] = False # dameda, dameda.... Akirameyou.... if vis_process is not None: vis_process.join() vis_process = None # start main training routine. print next_shuzo_scale = 10.0 * (1 + epoch) next_shuzo_offset = 1 + abs( int(round(np.random.normal(scale=next_shuzo_scale)))) for train_offset in range(0, n_movie - n_timeseries): for difficulty in difficulties: movie_clip = current_movie if not matsuoka_shuzo[difficulty]: # Doushitesokode yamerunda... continue else: # Akiramen'nayo! pass if difficulty == 'normal': movie_clip_in = movie_clip else: movie_clip_in = prediction_movie maybe_dat = create_batch(train_offset, movie_clip_in, movie_clip) if not maybe_dat: #print "Warning: skip offset", train_offset, "because of unavailable data." continue data_in, data_out, data_other = maybe_dat movie_in = Variable(cuda.to_gpu(data_in)) movie_out = Variable(cuda.to_gpu(data_out)) movie_other = Variable(cuda.to_gpu(data_other)) movie_out_predict_before = evol(movie_in) movie_out_predict = proj( movie_out_predict_before) # no proj vis_kit[difficulty] = (movie_in.data.get(), movie_out.data.get(), movie_out_predict_before.data.get(), movie_out_predict.data.get()) if args.norm == 'dcgan': yl = dis(movie_in, movie_out_predict) L_evol = F.softmax_cross_entropy( yl, Variable(xp.zeros(batchsize, dtype=np.int32))) L_dis = F.softmax_cross_entropy( yl, Variable(xp.ones(batchsize, dtype=np.int32))) # train discriminator yl_train = dis(movie_in, movie_out) L_dis += F.softmax_cross_entropy( yl_train, Variable(xp.zeros(batchsize, dtype=np.int32))) elif args.norm == 'CA': L_evol = d_norm(0, dis, movie_out, movie_out_predict_before) L_proj = d_norm(0, dis, movie_out, movie_out_predict) L_dis = d_norm(1, dis, movie_out, movie_out_predict_before) # L_dis += d_norm(1, dis, movie_out, movie_out_predict) L_dis += d_norm(0, dis, movie_out, movie_other) # L_dis += d_norm(0, dis, movie_other, movie_out) else: L2norm = (movie_out - movie_out_predict)**2 yl = F.sum(L2norm) / L2norm.data.size L_evol = yl evol_scores[difficulty] += [ L_evol.data.get() ] # np.average(F.softmax(yl).data.get()[:,0]) proj_scores[difficulty] += [ L_proj.data.get() ] # np.average(F.softmax(yl).data.get()[:,0]) # stop learning on normal mode. if difficulty == 'hard': o_evol.zero_grads() L_evol.backward() o_evol.update() o_dis.zero_grads() L_dis.backward() o_dis.update() o_proj.zero_grads() L_proj.backward() o_proj.update() movie_in.unchain_backward() movie_out_predict.unchain_backward() movie_out_predict_before.unchain_backward() movie_other.unchain_backward() L_evol.unchain_backward() if args.norm == 'dcgan' or args.norm == 'CA': L_dis.unchain_backward() sys.stdout.write( '%d %6s %s: %f -> %f, %f -> %f shuzo:%s\r' % (train_offset, difficulty, args.norm, np.average(evol_scores['normal']), np.average(proj_scores['normal']), np.average(evol_scores['hard']), np.average(proj_scores['hard']), str(shuzo_evoke_timestep[-10:]))) sys.stdout.flush() # update the prediction as results of learning. prediction_movie[ train_offset + n_timeseries - 1] = evolve_image( evol, proj, prediction_movie[train_offset:train_offset + n_timeseries - 1]) # prevent too much learning from noisy prediction. # if len(evol_scores['hard'])>=10 and np.average(evol_scores['hard'][-5:-1]) > 5 * np.average(evol_scores['normal']): if train_offset == next_shuzo_offset: next_shuzo_offset = train_offset + 1 + abs( int(round( np.random.normal(scale=next_shuzo_scale)))) # Zettaini, akiramennna yo! # matsuoka_shuzo['hard'] = False shuzo_evoke_timestep += [train_offset] evol_scores['hard'] = [0.0] proj_scores['hard'] = [0.0] for t in range(train_offset, train_offset + n_timeseries): if current_movie[t] is not None: prediction_movie[t] = current_movie[t] print def visualize_vis_kit(vis_kit): print "visualizing...", sys.stdout.flush() for difficulty in difficulties: if vis_kit[difficulty] is None: continue movie_data, movie_out_data, movie_pred_data, movie_proj_data = vis_kit[ difficulty] imgfn = '%s/batch-%s_%d_%04d.png' % ( out_image_dir, difficulty, epoch, train_ctr) n_col = n_timeseries + 3 plt.rcParams['figure.figsize'] = (1.0 * n_col, 1.0 * batchsize) plt.close('all') for ib in range(batchsize): for j in range(n_timeseries - 1): plt.subplot(batchsize, n_col, 1 + ib * n_col + j) if j < 2: vmin = -1 vmax = 1 else: vmin = 0 vmax = 1.4 plt.imshow(movie_data[ib, j, :, :], vmin=vmin, vmax=vmax) plt.axis('off') plt.subplot(batchsize, n_col, 1 + ib * n_col + n_timeseries - 1) plt.imshow(movie_pred_data[ib, 0, :, :], vmin=0, vmax=1.4) plt.axis('off') plt.subplot(batchsize, n_col, 1 + ib * n_col + n_timeseries) plt.imshow(movie_proj_data[ib, 0, :, :], vmin=0, vmax=1.4) plt.axis('off') plt.subplot(batchsize, n_col, 1 + ib * n_col + n_timeseries + 2) plt.imshow(movie_out_data[ib, 0, :, :], vmin=0, vmax=1.4) plt.axis('off') plt.suptitle(imgfn) plt.savefig(imgfn) subprocess.call( "cp %s ~/public_html/suntomorrow-batch-%s-%s.png" % (imgfn, difficulty, args.gpu), shell=True) print "visualized.", sys.stdout.flush() vis_process = Process(target=visualize_vis_kit, args=(vis_kit, )) vis_process.start()
class RNNCharEstimator(ChainerClassifier): def __init__(self, net_type='lstm', net_hidden=100, vocab_size=1000, dropout_ratio=0.0, seq_size=70, grad_clip=100.0, **params): ChainerClassifier.__init__(self, **params) self.net_hidden = net_hidden self.net_type = net_type self.vocab_size = vocab_size self.dropout_ratio = dropout_ratio self.seq_size = seq_size self.grad_clip = grad_clip self.param_names.append('vocab_size') self.param_names.append('net_type') self.param_names.append('net_hidden') self.param_names.append('dropout_ratio') def setup_network(self, n_features): if self.net_type == 'lstm': self.network = CharLSTM(self.vocab_size, self.net_hidden, self.batch_size) elif self.net_type == 'irnn': self.network = CharIRNN(self.vocab_size, self.net_hidden, self.batch_size) else: error("Unknown net_type") self.reset_accum_loss() def reset_accum_loss(self): if self.gpu >= 0: self.accum_loss = Variable(cuda.zeros(())) else: self.accum_loss = Variable(np.zeros(())) def forward_train(self, x, t): return self.network.train(x, t, dropout_ratio=self.dropout_ratio) def predict(self, x_data): self.network.reset_state(1) if self.gpu >= 0: self.network.to_gpu() x_data = cuda.to_gpu(x_data) results = None for i in xrange(x_data.shape[0]): x = Variable(x_data[i, :]) y = self.network.predict(x) if results == None: results = cuda.to_cpu(y.data) else: results = np.concatenate([results, cuda.to_cpu(y.data)]) results = results.argmax(1) return results def fit_update(self, loss, batch_id): self.accum_loss += loss if ((batch_id + 1) % self.seq_size) == 0: # Run Truncated BPTT self.optimizer.zero_grads() self.accum_loss.backward() self.accum_loss.unchain_backward() # truncate self.optimizer.clip_grads(self.grad_clip) self.optimizer.update() self.reset_accum_loss() def make_batch(self, x_data, y_data, batch_id): batch_num = self.n_samples / self.batch_size x_batch = np.array([ x_data[(batch_id + batch_num * j) % self.n_samples] for j in xrange(self.batch_size) ]).reshape(self.batch_size) y_batch = np.array([ y_data[(batch_id + batch_num * j) % self.n_samples] for j in xrange(self.batch_size) ]) return x_batch, y_batch
def train(self, words, steps, batchsize=100, sequence_length=10): """ Train the Predictor's model on words for steps number of steps. """ whole_len = len(words) train_data = np.ndarray(whole_len, dtype=np.int32) jumps = steps * sequence_length # Initialize training data and maybe vocab. if self.vocab is None: vocab_initializing = True self.vocab = {} for i, word in enumerate(words): if vocab_initializing: if word not in self.vocab: self.vocab[word] = len(self.vocab) train_data[i] = self.vocab[word] vocab_initializing = False print 'corpus length:', len(words) print 'self.vocab size:', len(self.vocab) # Initialize base model (if we need to) if self.model is None: self.model = BaseRNN(len(self.vocab), self.units) if self.gpu >= 0: cuda.get_device(self.gpu).use() self.model.to_self.gpu() optimizer = optimizers.RMSprop(lr=self.settings.learning_rate, alpha=self.settings.decay_rate, eps=1e-8) optimizer.setup(self.model) jumpsPerEpoch = whole_len / batchsize epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(self.units, batchsize=batchsize) if self.gpu >= 0: accum_loss = Variable(cuda.zeros(())) for _, value in state.items(): value.data = cuda.to_self.gpu(value.data) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) print 'going to train {} iterations'.format(steps) for i in xrange(jumps): x_batch = np.array([train_data[(jumpsPerEpoch * j + i) % whole_len] for j in xrange(batchsize)]) y_batch = np.array([train_data[(jumpsPerEpoch * j + i + 1) % whole_len] for j in xrange(batchsize)]) if self.gpu >= 0: x_batch = cuda.to_self.gpu(x_batch) y_batch = cuda.to_self.gpu(y_batch) state, loss_i = self.model.forward_one_step(x_batch, y_batch, state, dropout_ratio=self.settings.dropout) accum_loss += loss_i if (i + 1) % sequence_length == 0: now = time.time() print '{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/sequence_length, steps, accum_loss.data / sequence_length, now-cur_at) cur_at = now optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate if self.gpu >= 0: accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) optimizer.clip_grads(self.settings.grad_clip) optimizer.update() if (i + 1) % jumpsPerEpoch == 0: epoch += 1 if epoch >= self.settings.learning_rate_decay_after: optimizer.lr *= self.settings.learning_rate_decay print 'decayed self.settings.learning rate by a factor {} to {}'.format(self.settings.learning_rate_decay, optimizer.lr)
def main(): args = parse_args() init_program_state(args) vocab = make_vocab() data, batched_data = load_data(args.train, vocab, args.batch_size) dev , batched_dev = load_data(args.dev, vocab, 1) test, batched_test = load_data(args.test, vocab, 1) model = init_model(input_size = len(vocab), embed_size = args.embed_size, hidden_size = args.hidden_size, output_size = len(vocab)) optimizer = optimizers.SGD(lr=args.lr) # Begin Training UF.init_model_parameters(model) model = UF.convert_to_GPU(USE_GPU, model) optimizer.setup(model) batchsize = args.batch_size epoch = args.epoch accum_loss = Variable(xp.zeros((), dtype=np.float32)) counter = 0 # For each epoch.. for ep in range(epoch): UF.trace("Training Epoch %d" % ep) total_tokens = 0 log_ppl = 0.0 # For each batch, do forward & backward computations for i, batch in enumerate(batched_data): loss, nwords = forward(model, batch) accum_loss += loss log_ppl += loss.data.reshape(()) # Tracing... total_tokens += nwords # UF.trace(' %d/%d = %.5f' % (min(i*batchsize, len(data)), len(data), loss.data.reshape(())*batchsize)) # Counting if (counter+1) % bp_len == 0: optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() accum_loss = Variable(xp.zeros((), dtype=np.float32)) optimizer.clip_grads(grad_clip) optimizer.update() counter += 1 # Counting Perplexity log_ppl /= total_tokens UF.trace(" PPL (Train) = %.10f" % math.exp(UF.to_cpu(USE_GPU, log_ppl))) dev_ppl = evaluate(model, batched_dev) UF.trace(" PPL (Dev) = %.10f" % math.exp(UF.to_cpu(USE_GPU, dev_ppl))) # Reducing learning rate if ep > 6: optimizer.lr /= 1.2 UF.trace("Reducing LR:", optimizer.lr) # Begin Testing UF.trace("Begin Testing...") test_ppl = evaluate(model, batched_test) UF.trace(" log(PPL) = %.10f" % test_ppl) UF.trace(" PPL = %.10f" % math.exp(UF.to_cpu(USE_GPU, test_ppl)))
def train(self, train_data, train_input, test_data, test_input, n_epochs, filename=None, KL_loss=False, Add_training=False): """ :param train_data: data in the form n_batches x batch_size x n_steps x n_outputs :param test_data: data in the form n_batches x batch_size x n_steps x n_outputs :param n_epochs: nr of training epochs :param dec_input: this is the input to the decoder, which can modulate input dynamics; size: step_size x n_inputs :return: """ # keep track of loss train_loss = np.zeros(n_epochs) test_loss = np.zeros(n_epochs) batches_loss = np.zeros(train_data.shape[0] * n_epochs) # keep track of learned alphas and weights if self.model.mode is not 'Static': learning_alphasS = np.empty( (n_epochs + 1, self.model.hidden.alphaS.alpha.size)) learning_alphasR = np.empty( (n_epochs + 1, self.model.hidden.alphaR.alpha.size)) learning_alphasS[0, :] = self.model.hidden.alphaS.alpha learning_alphasR[0, :] = self.model.hidden.alphaR.alpha # saved_U_fast = np.zeros((n_epochs,self.model.n_fast, self.model.n_slow+self.model.n_inout)) # saved_U_inout= np.zeros((n_epochs, self.model.n_inout, self.model.n_fast)) # saved_W_fast = np.zeros((n_epochs, self.model.n_fast, self.model.n_fast)) # saved_W_inout= np.zeros((n_epochs, self.model.n_inout, self.model.n_inout)) index = 0 #for batches_wise loss best_loss = 4000 if Add_training: self.optimizer.setup(self.model.slow) #self.model.inout.W.W.data = np.zeros((25,25)) #NO RECURRENT CONNECTION IN OUTPUT LAYER! for epoch in tqdm.tqdm(xrange(n_epochs)): #for epoch in xrange(n_epochs): with chainer.using_config('train', True): n_batches = train_data.shape[0] batch_size = train_data.shape[1] n_steps = train_data.shape[2] for i in range(n_batches): #print('Sample number %i' %i) loss = Variable(self.xp.array(0, 'float32')) self.model.reset_state() #initialization for this batch data0 = Variable(train_data[i, :, 0, :]) self.model.hidden.initialize_state(batch_size) #self.model.readout.initialize_state(batch_size) for t in xrange(0, n_steps, 1): x = Variable(train_input[i, :, t, :]) data = self.xp.array(train_data[i, :, t, :]) _loss = mean_squared_error(self.model(x), data) # prediction mode if KL_loss: _loss = self.KL_divergence(self.model(), data) #print _loss train_loss[epoch] += cuda.to_cpu(_loss.data) loss += _loss batches_loss[index] = loss.data index = index + 1 self.model.cleargrads( ) #look into this function to clear grad of a whole link loss.backward() loss.unchain_backward() #self.model.inout.W.disable_update() #NO RECURRENT CONNECTIONS IN OUTPUT LAYER! #if self.model.mode == 'Static': # self.model.hidden.alphaS.disable_update() # self.model.hidden.alphaR.disable_update() if Add_training: #delete grads to be deleted! or use enable_update() self.model.fast.U1.disable_update() self.model.fast.W.disable_update() self.model.inout.disable_update() self.model.slow.W.disable_update() self.optimizer.update() #print 'UPDATE' # saved_U_fast[epoch,:,:] = self.model.fast.U.W.data # saved_U_inout[epoch,:,:]= self.model.inout.U.W.data # saved_W_fast[epoch,:,:] = self.model.fast.W.W.data # saved_W_inout[epoch, :,:]= self.model.inout.W.W.data # # #save learning of time constants if self.model.mode is not 'Static': learning_alphasS[epoch + 1, :] = self.model.hidden.alphaS.alpha.data learning_alphasR[epoch + 1, :] = self.model.hidden.alphaR.alpha.data # compute loss per epoch train_loss[epoch] /= (n_batches * batch_size * self.model.n_out) # save model at some epoch #epochs_save = np.linspace(0, n_epochs-n_epochs/10, num=10, dtype=int) #if epoch in epochs_save: # thisname = 'model_at_epoch_%i' %epoch # self.save('saved_models/'+filename+'/'+thisname) # validation with chainer.using_config('train', False): n_batches = test_data.shape[0] batch_size = test_data.shape[1] n_steps = test_data.shape[2] # assert(n_steps == n_clamp+n_pred) for i in range(n_batches): self.model.reset_state() data0 = Variable(test_data[i, :, t, :]) self.model.hidden.initialize_state(batch_size) # self.model.readout.initialize_state(batch_size) for t in xrange(0, n_steps, 1): x = Variable(test_input[i, :, t, :]) data = self.xp.array(test_data[i, :, t, :]) _loss = mean_squared_error(self.model(x), data) # prediction mode if KL_loss: _loss = self.KL_divergence(self.model(), data) test_loss[epoch] += cuda.to_cpu(_loss.data) # compute loss per epoch test_loss[epoch] /= (n_batches * batch_size * self.model.n_out) #method do avoid overfitting if test_loss[epoch] < best_loss: best_loss = test_loss[epoch] self.save('saved_models/' + filename + '/best') np.save('saved_models/' + filename + '/conv_epoch', epoch) # end of training cycle np.save('saved_models/' + filename + '/best_loss', best_loss) if self.model.mode is not 'Static': np.save('saved_models/' + filename + '/learning_alphaS', learning_alphasS) np.save('saved_models/' + filename + '/learning_alphaR', learning_alphasR) # np.save('saved_U_fast', saved_U_fast) # np.save('saved_W_fast', saved_W_fast) # np.save('saved_U_inout', saved_U_inout) # np.save('saved_W_inout', saved_W_inout) # np.save('saved_models/'+filename+'/saved_alphas_fast', learning_alphas_fast) # np.save('saved_models/'+filename+'/saved_alphas_slow', learning_alphas_slow) # np.save('saved_models/'+filename+'/saved_alphas_inout', learning_alphas_inout) # return train_loss, test_loss, batches_loss
def main(): # arguments parser = argparse.ArgumentParser() parser.add_argument('--data_dir', type=str, default='data/dazai') parser.add_argument('--checkpoint_dir', type=str, default='model') parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--rnn_size', type=int, default=128) parser.add_argument('--learning_rate', type=float, default=2e-3) parser.add_argument('--learning_rate_decay', type=float, default=0.97) parser.add_argument('--learning_rate_decay_after', type=int, default=10) parser.add_argument('--decay_rate', type=float, default=0.95) parser.add_argument('--dropout', type=float, default=0.0) parser.add_argument('--seq_length', type=int, default=50) parser.add_argument('--batchsize', type=int, default=50) parser.add_argument('--epochs', type=int, default=50) parser.add_argument('--grad_clip', type=int, default=5) parser.add_argument('--init_from', type=str, default='') parser.add_argument('--enable_checkpoint', type=bool, default=True) parser.add_argument('--file_name', type=str, default='input.txt') args = parser.parse_args() if not os.path.exists(args.checkpoint_dir): os.mkdir(args.checkpoint_dir) n_epochs = args.epochs n_units = args.rnn_size batchsize = args.batchsize bprop_len = args.seq_length grad_clip = args.grad_clip xp = cuda.cupy if args.gpu >= 0 else np train_data, words, vocab = load_data(args.data_dir, args.file_name) pickle.dump(vocab, open('%s/vocab.bin' % args.data_dir, 'wb')) if len(args.init_from) > 0: model = pickle.load(open(args.init_from, 'rb')) else: model = CharRNN(len(vocab), n_units) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) #optimizer = chainer.optimizers.SGD(lr=1.0) optimizer.setup(model) optimizer.add_hook( chainer.optimizer.GradientClipping(grad_clip)) #勾配の上限を設定 whole_len = train_data.shape[0] #jump = whole_len / batchsize jump = int(whole_len / batchsize) epoch = 0 start_at = time.time() cur_at = start_at state = make_initial_state(n_units, batchsize=batchsize) if args.gpu >= 0: accum_loss = Variable(xp.zeros(()).astype(np.float32)) for key, value in state.items(): value.data = cuda.to_gpu(value.data) else: accum_loss = Variable(xp.zeros(()).astype(np.float32)) print('going to train {} iterations'.format(jump * n_epochs / bprop_len)) sum_perp = 0 count = 0 iteration = 0 for i in range(jump * n_epochs): x_batch = xp.array([ train_data[(jump * j + i) % whole_len] for j in xrange(batchsize) ]) y_batch = xp.array([ train_data[(jump * j + i + 1) % whole_len] for j in xrange(batchsize) ]) if args.gpu >= 0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch) state, loss_i = model.forward_one_step(x_batch, y_batch, state, dropout_ratio=args.dropout) accum_loss += loss_i count += 1 if (i + 1) % bprop_len == 0: # Run truncated BPTT iteration += 1 sum_perp += accum_loss.data now = time.time() #print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at)) print('{}/{}, train_loss = {}, time = {:.2f}'.format( (i + 1) / bprop_len, jump * n_epochs / bprop_len, accum_loss.data / bprop_len, now - cur_at)) cur_at = now model.cleargrads() #optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate #accum_loss = Variable(xp.zeros(()).astype(np.float32)) if args.gpu >= 0: accum_loss = Variable(xp.zeros(()).astype(np.float32)) #accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) #optimizer.clip_grads(grad_clip) optimizer.update() if (i + 1) % 1000 == 0: print('epoch: ', epoch) print('iteration: ', iteration) print('training perplexity: ', np.exp(float(sum_perp) / count)) sum_perp = 0 count = 0 if args.enable_checkpoint: if (i + 1) % 10000 == 0: fn = ('%s/charrnn_epoch_%.2f.chainermodel' % (args.checkpoint_dir, float(i) / jump)) pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb')) pickle.dump( copy.deepcopy(model).to_cpu(), open('%s/latest.chainermodel' % (args.checkpoint_dir), 'wb')) if (i + 1) % jump == 0: epoch += 1 if epoch >= args.learning_rate_decay_after: optimizer.lr *= args.learning_rate_decay print('decayed learning rate by a factor {} to {}'.format( args.learning_rate_decay, optimizer.lr)) sys.stdout.flush()
def findNumEpoch(architecture, waves, infos, gpu_id, waveFs): if cupy is not None and gpu_id >= 0: xp = cupy cupy.cuda.Device(gpu_id).use() else: xp = np inputLength = totalInputLength(architecture) labels = getLabels() numLabel = len(labels) groupFold = ((0, 1, 2), (3, ), (4, )) np.random.seed() seed = np.random.randint(0, np.iinfo(int32).max) np.random.seed(seed) net = Net(numLabel, architecture, functions.elu) # opt=Eve(1e-4) opt = optimizers.Adam(1e-4) opt.setup(net) if gpu_id >= 0: net.to_gpu(gpu_id) insLabelSize = 2**2 devSize = 2**1 devSegmentSecUpper = 10 devEpoch = 2**5 convergenceEpoch = 2**5 * devEpoch devSegmentLenUpper = int(devSegmentSecUpper * waveFs) devFold = sorted(set(groupFold[1])) devLabelWave = groupLabelWave((devFold, ), infos)[0] devLabelWave = list( itertools.chain.from_iterable([[(li, i) for i in devLabelWave[la]] for li, la in enumerate(labels)])) devLabelWave = sorted(devLabelWave, key=lambda lw: len(waves[lw[1]])) devBatchIndex = np.array_split(np.arange(len(devLabelWave)), int(np.ceil(len(devLabelWave) / devSize))) devLabelSize = np.zeros(numLabel, int32) for li, wi in devLabelWave: devLabelSize[li] += len(waves[wi]) devWaves = {} for li, wi in devLabelWave: wave = waves[wi] wave = np.concatenate((wave, np.zeros((inputLength - 1) // 2, float32))) devWaves[wi] = wave insFold = sorted(set(groupFold[0])) insLabelWave = groupLabelWave((insFold, ), infos)[0] insLabelWaveIndex = [[] for i in range(len(labels))] for li, la in enumerate(labels): for i in insLabelWave[la]: wave = waves[i] timeIndex = np.arange(len(wave)) waveIndex = np.ones(len(wave), int32) * i index = np.stack((waveIndex, timeIndex), axis=1) insLabelWaveIndex[li].append(index) insLabelWaveIndex[li] = np.concatenate(insLabelWaveIndex[li], axis=0) insRemainingLabelWave = [ np.random.permutation(insLabelWaveIndex[li]) for li in range(len(labels)) ] epoch = 0 bestEpoch = 0 epochIncorrect = {} while epoch < bestEpoch + convergenceEpoch: x, tr = makeInpTru(insLabelWaveIndex, waves, insRemainingLabelWave, inputLength, insLabelSize, numLabel) x = x[:, newaxis, :, newaxis] x = xp.asarray(x) x = Variable(x) x = net.callSingle(x, True) tr = tr[..., newaxis, newaxis] tr = xp.asarray(tr) e = functions.softmax_cross_entropy(x, tr) net.cleargrads() e.backward() e.unchain_backward() # opt.update(loss=e.data) opt.update() if epoch % devEpoch != devEpoch - 1: epoch += 1 continue incorrect = xp.zeros(numLabel, int32) with chainer.using_config("enable_backprop", False): for bi, index in enumerate(devBatchIndex): waveIndex = np.array([devLabelWave[i][1] for i in index]) tru = np.array([devLabelWave[i][0] for i in index]) waveLen = len(devWaves[waveIndex[-1]]) segmentTimes = np.array_split( np.arange(waveLen), int(np.ceil((waveLen) / devSegmentLenUpper))) net.reset() for si, segTime in enumerate(segmentTimes): t0 = segTime[0] t1 = segTime[-1] + 1 x = np.zeros((len(index), t1 - t0), float32) tr = -np.ones((len(index), t1 - t0), int32) for xi, wi in enumerate(waveIndex): if len(devWaves[wi]) <= t0: continue w = devWaves[wi][t0:t1] x[xi, :len(w)] = w tr[xi, :len(w)] = tru[xi] if t0 < (inputLength - 1) // 2: tr[:, :(inputLength - 1) // 2 - t0] = -1 x = x[:, newaxis, :, newaxis] x = xp.asarray(x) x = Variable(x) x = net(x, False) x.unchain_backward() x = xp.argmax(x.data, axis=1) tr = tr[..., newaxis] tr = xp.asarray(tr) for li, la in enumerate(labels): incorrect[li] += (x[tr == li] != li).sum() net.reset() if gpu_id >= 0: incorrect = cupy.asnumpy(incorrect) incorrect = (incorrect / devLabelSize).mean() print("epoch", epoch, "incorrect", incorrect) if len(epochIncorrect) == 0 or incorrect < epochIncorrect[bestEpoch]: bestEpoch = epoch epochIncorrect[epoch] = incorrect epoch += 1 devEpochs = np.array(sorted(epochIncorrect), int32) bestScore = epochIncorrect[bestEpoch] epochIncorrect = np.array([epochIncorrect[ep] for ep in devEpochs]) return bestEpoch, bestScore, seed
def update_core(self): enc_optimizer = self.get_optimizer('enc') dec_optimizer = self.get_optimizer('dec') dis_optimizer = self.get_optimizer('dis') enc, dec, dis = self.enc, self.dec, self.dis xp = enc.xp batch = self.get_iterator('main').next() batchsize = len(batch) in_ch = batch[0][0].shape[0] assert(batch[0][1].shape[0] % 2 == 0) out_ch = int(batch[0][1].shape[0] / 2) # Changing to voxel space w_in = 256 w_out = 64 x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype(np.float32) t_out = xp.zeros((batchsize, out_ch, w_out, w_out, w_out)).astype(np.float32) d_in = xp.zeros((batchsize, out_ch, w_out, w_out, w_out)).astype(np.float32) for i in range(batchsize): x_in[i,:] = xp.asarray(batch[i][0]) t_out[i,:] = xp.asarray(batch[i][1][0]) d_in[i,:] = xp.asarray(batch[i][1][1]) x_in = Variable(x_in) t_out = Variable(t_out) d_in = Variable(d_in) with chainer.using_config('train', True): # This will no longer work for pix-pix z = enc(x_in) x_out = dec(z) y_fake = dis(x_out, d_in) y_real = dis(t_out, d_in) update_dis, update_gen = self.check_dis(y_real, y_fake) if update_gen: enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake) for z_ in z: z_.unchain_backward() dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake) else: print("Not updating gen") x_in.unchain_backward() x_out.unchain_backward() if update_dis : dis_optimizer.update( self.loss_dis, dis, y_real, y_fake) else: print("Not updating disc")
def _train(self, **kwargs): gpu = -1 if "gpu" not in kwargs else kwargs["gpu"] lr = 2e-3 if "lr" not in kwargs else kwargs["lr"] lr_decay = 0.97 if "lr_decay" not in kwargs else kwargs["lr_decay"] lr_decay_after=10 if "lr_decay_after" not in kwargs else kwargs["lr_decay_after"] decay_rate = 0.95 if "decay_rate" not in kwargs else kwargs["decay_rate"] dropout = 0.0 if "dropout" not in kwargs else kwargs["dropout"] bprop_len = 50 if "bprop_len" not in kwargs else kwargs["bprop_len"] batchsize = 50 if "batchsize" not in kwargs else kwargs["batchsize"] grad_clip = 5 if "grad_clip" not in kwargs else kwargs["grad_clip"] n_epochs = 5 if "epochs" not in kwargs else kwargs["epochs"] if gpu >= 0: cuda.get_device(gpu).use() self.model.to_gpu() optimizer = optimizers.RMSprop(lr=lr, alpha=decay_rate, eps=1e-8) optimizer.setup(self.model) train_data = self.dataset whole_len = train_data.shape[0] jump = whole_len // batchsize epoch = 0 start_at = time.time() cur_at = start_at state = self.model.make_initial_state(batchsize=batchsize) if gpu >= 0: accum_loss = Variable(cuda.zeros(())) for key, value in state.items(): value.data = cuda.to_gpu(value.data)#plist else: accum_loss = Variable(np.zeros((), dtype=np.float32)) print ('going to train {} iterations'.format(jump * n_epochs)) for i in range(jump * n_epochs): x_batch = np.array([train_data[(jump * j + i) % whole_len] for j in range(batchsize)]) y_batch = np.array([train_data[(jump * j + i + 1) % whole_len] for j in range(batchsize)]) if gpu >=0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch) state, loss_i = self.model.forward_one_step(x_batch, y_batch, state, dropout_ratio=dropout) accum_loss += loss_i if (i + 1) % bprop_len == 0: # Run truncated BPTT now = time.time() sys.stderr.write('\r{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)//bprop_len,(jump*n_epochs)//bprop_len, accum_loss.data / bprop_len, now-cur_at)) sys.stderr.flush() cur_at = now optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate if gpu >= 0: accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) optimizer.clip_grads(grad_clip) optimizer.update() if (i + 1) % 10000 == 0: pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb')) if (i + 1) % jump == 0: epoch += 1 if epoch >= lr_decay_after: optimizer.lr *= lr_decay print ('decayed learning rate by a factor {} to {}'.format(lr_decay, optimizer.lr)) sys.stdout.flush() pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))
class RNNCharEstimator(ChainerClassifier): def __init__(self, net_type='lstm', net_hidden=100, vocab_size=1000, dropout_ratio=0.0, seq_size=70, grad_clip=100.0, **params): ChainerClassifier.__init__(self, **params) self.net_hidden = net_hidden self.net_type = net_type self.vocab_size = vocab_size self.dropout_ratio = dropout_ratio self.seq_size = seq_size self.grad_clip = grad_clip self.param_names.append('vocab_size') self.param_names.append('net_type') self.param_names.append('net_hidden') self.param_names.append('dropout_ratio') def setup_network(self, n_features): if self.net_type == 'lstm': self.network = CharLSTM(self.vocab_size, self.net_hidden, self.batch_size) elif self.net_type == 'irnn': self.network = CharIRNN(self.vocab_size, self.net_hidden, self.batch_size) else: error("Unknown net_type") self.reset_accum_loss() def reset_accum_loss(self): if self.gpu >= 0: self.accum_loss = Variable(cuda.zeros(())) else: self.accum_loss = Variable(np.zeros(())) def forward_train(self, x, t): return self.network.train(x, t, dropout_ratio=self.dropout_ratio) def predict(self, x_data): self.network.reset_state(1) if self.gpu >= 0: self.network.to_gpu() x_data = cuda.to_gpu(x_data) results = None for i in xrange(x_data.shape[0]): x = Variable(x_data[i,:]) y = self.network.predict(x) if results == None: results = cuda.to_cpu(y.data) else: results = np.concatenate([results, cuda.to_cpu(y.data)]) results = results.argmax(1) return results def fit_update(self, loss, batch_id): self.accum_loss += loss if ((batch_id + 1) % self.seq_size) == 0: # Run Truncated BPTT self.optimizer.zero_grads() self.accum_loss.backward() self.accum_loss.unchain_backward() # truncate self.optimizer.clip_grads(self.grad_clip) self.optimizer.update() self.reset_accum_loss() def make_batch(self, x_data, y_data, batch_id): batch_num = self.n_samples / self.batch_size x_batch = np.array([x_data[(batch_id + batch_num * j) % self.n_samples] for j in xrange(self.batch_size)]).reshape(self.batch_size) y_batch = np.array([y_data[(batch_id + batch_num * j) % self.n_samples] for j in xrange(self.batch_size)]) return x_batch, y_batch
def train_dcgan_labeled(evol, dis, proj, epoch0=0): global epoch o_evol = optimizers.Adam(alpha=0.0002, beta1=0.5) o_evol.setup(evol) o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5) o_dis.setup(dis) o_proj = optimizers.Adam(alpha=0.0002, beta1=0.5) o_proj.setup(proj) if not args.fresh_start: serializers.load_hdf5("%s/dcgan_model_evol.h5"%(out_model_dir),evol) serializers.load_hdf5("%s/dcgan_state_evol.h5"%(out_model_dir),o_evol) serializers.load_hdf5("%s/dcgan_model_dis.h5"%(out_model_dir),dis) serializers.load_hdf5("%s/dcgan_state_dis.h5"%(out_model_dir),o_dis) serializers.load_hdf5("%s/dcgan_model_proj.h5"%(out_model_dir),proj) serializers.load_hdf5("%s/dcgan_state_proj.h5"%(out_model_dir),o_proj) o_evol.add_hook(chainer.optimizer.WeightDecay(0.00001)) o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001)) o_proj.add_hook(chainer.optimizer.WeightDecay(0.00001)) vis_process = None for epoch in xrange(epoch0,n_epoch): for train_ctr in xrange(0, n_train, batchsize): print "epoch:", epoch,"train:",train_ctr, # discriminator # 0: from dataset # 1: from noise good_movie=True prediction_movie=n_movie*[None] try: current_movie = load_movie() except: continue for i in range(n_timeseries-1): if current_movie[i] is None: good_movie=False else: prediction_movie[i]=current_movie[i] if not good_movie: continue for i in range(n_timeseries-1,n_movie): prediction_movie[i] = evolve_image(evol,proj,prediction_movie[i-n_timeseries+1 : i]) if train_ctr%save_interval==0: for answer_mode in ['predict','observe']: for offset in [n_timeseries,16,32,64,119]: if offset >= n_movie: continue img_prediction = prediction_movie[offset] if answer_mode == 'observe': img_prediction = current_movie[offset] if img_prediction is None: continue imgfn = '%s/futuresun_%d_%04d_%s+%03d.png'%(out_image_dir, epoch,train_ctr,answer_mode,offset) plt.rcParams['figure.figsize'] = (12.0, 12.0) plt.close('all') plt.imshow(img_prediction,vmin=0,vmax=1.4) plt.suptitle(imgfn) plt.savefig(imgfn) subprocess.call("cp %s ~/public_html/futuresun/"%(imgfn),shell=True) # we don't have enough disk for history history_dir = 'history/' #%d-%d'%(epoch, train_ctr) subprocess.call("mkdir -p %s "%(history_dir),shell=True) subprocess.call("cp %s/*.h5 %s "%(out_model_dir,history_dir),shell=True) if epoch>0 or train_ctr>0: print 'saving model...' serializers.save_hdf5("%s/dcgan_model_evol.h5"%(out_model_dir),evol) serializers.save_hdf5("%s/dcgan_state_evol.h5"%(out_model_dir),o_evol) serializers.save_hdf5("%s/dcgan_model_dis.h5"%(out_model_dir),dis) serializers.save_hdf5("%s/dcgan_state_dis.h5"%(out_model_dir),o_dis) serializers.save_hdf5("%s/dcgan_model_proj.h5"%(out_model_dir),proj) serializers.save_hdf5("%s/dcgan_state_proj.h5"%(out_model_dir),o_proj) print '...saved.' movie_in = None movie_out = None movie_out_predict=None evol_scores = {} proj_scores = {} matsuoka_shuzo = {} shuzo_evoke_timestep = [] difficulties = ['normal','hard'] vis_kit = {} for difficulty in difficulties: evol_scores[difficulty] = [0.0] proj_scores[difficulty] = [0.0] matsuoka_shuzo[difficulty] = True vis_kit[difficulty] = None matsuoka_shuzo['normal'] = False # dameda, dameda.... Akirameyou.... if vis_process is not None: vis_process.join() vis_process = None # start main training routine. print next_shuzo_scale=10.0 * (1+epoch) next_shuzo_offset = 1 + abs(int(round(np.random.normal(scale=next_shuzo_scale)))) for train_offset in range(0,n_movie-n_timeseries): for difficulty in difficulties: movie_clip = current_movie if not matsuoka_shuzo[difficulty]: # Doushitesokode yamerunda... continue else: # Akiramen'nayo! pass if difficulty == 'normal': movie_clip_in = movie_clip else: movie_clip_in = prediction_movie maybe_dat = create_batch(train_offset,movie_clip_in, movie_clip) if not maybe_dat : #print "Warning: skip offset", train_offset, "because of unavailable data." continue data_in, data_out, data_other = maybe_dat movie_in = Variable(cuda.to_gpu(data_in)) movie_out = Variable(cuda.to_gpu(data_out)) movie_other = Variable(cuda.to_gpu(data_other)) movie_out_predict_before = evol(movie_in) movie_out_predict = proj(movie_out_predict_before) # no proj vis_kit[difficulty] = (movie_in.data.get(), movie_out.data.get(), movie_out_predict_before.data.get(), movie_out_predict.data.get()) if args.norm == 'dcgan': yl = dis(movie_in,movie_out_predict) L_evol = F.softmax_cross_entropy(yl, Variable(xp.zeros(batchsize, dtype=np.int32))) L_dis = F.softmax_cross_entropy(yl, Variable(xp.ones(batchsize, dtype=np.int32))) # train discriminator yl_train = dis(movie_in,movie_out) L_dis += F.softmax_cross_entropy(yl_train, Variable(xp.zeros(batchsize, dtype=np.int32))) elif args.norm == 'CA': L_evol = d_norm(0, dis, movie_out, movie_out_predict_before) L_proj = d_norm(0, dis, movie_out, movie_out_predict) L_dis = d_norm(1, dis, movie_out, movie_out_predict_before) # L_dis += d_norm(1, dis, movie_out, movie_out_predict) L_dis += d_norm(0, dis, movie_out, movie_other) # L_dis += d_norm(0, dis, movie_other, movie_out) else: L2norm = (movie_out - movie_out_predict)**2 yl = F.sum(L2norm) / L2norm.data.size L_evol = yl evol_scores[difficulty] += [L_evol.data.get()] # np.average(F.softmax(yl).data.get()[:,0]) proj_scores[difficulty] += [L_proj.data.get()] # np.average(F.softmax(yl).data.get()[:,0]) # stop learning on normal mode. if difficulty == 'hard': o_evol.zero_grads() L_evol.backward() o_evol.update() o_dis.zero_grads() L_dis.backward() o_dis.update() o_proj.zero_grads() L_proj.backward() o_proj.update() movie_in.unchain_backward() movie_out_predict.unchain_backward() movie_out_predict_before.unchain_backward() movie_other.unchain_backward() L_evol.unchain_backward() if args.norm == 'dcgan' or args.norm == 'CA': L_dis.unchain_backward() sys.stdout.write('%d %6s %s: %f -> %f, %f -> %f shuzo:%s\r'%(train_offset,difficulty, args.norm, np.average(evol_scores['normal']), np.average(proj_scores['normal']), np.average(evol_scores['hard']), np.average(proj_scores['hard']), str(shuzo_evoke_timestep[-10:]))) sys.stdout.flush() # update the prediction as results of learning. prediction_movie[train_offset+n_timeseries-1] = evolve_image(evol,proj,prediction_movie[train_offset: train_offset+n_timeseries-1]) # prevent too much learning from noisy prediction. # if len(evol_scores['hard'])>=10 and np.average(evol_scores['hard'][-5:-1]) > 5 * np.average(evol_scores['normal']): if train_offset == next_shuzo_offset: next_shuzo_offset = train_offset + 1 + abs(int(round(np.random.normal(scale=next_shuzo_scale)))) # Zettaini, akiramennna yo! # matsuoka_shuzo['hard'] = False shuzo_evoke_timestep += [train_offset] evol_scores['hard']=[0.0] proj_scores['hard']=[0.0] for t in range(train_offset, train_offset+n_timeseries): if current_movie[t] is not None: prediction_movie[t]=current_movie[t] print def visualize_vis_kit(vis_kit): print "visualizing...", sys.stdout.flush() for difficulty in difficulties: if vis_kit[difficulty] is None: continue movie_data, movie_out_data, movie_pred_data, movie_proj_data = vis_kit[difficulty] imgfn = '%s/batch-%s_%d_%04d.png'%(out_image_dir,difficulty, epoch,train_ctr) n_col=n_timeseries+3 plt.rcParams['figure.figsize'] = (1.0*n_col,1.0*batchsize) plt.close('all') for ib in range(batchsize): for j in range(n_timeseries-1): plt.subplot(batchsize,n_col,1 + ib*n_col + j) if j < 2: vmin=-1; vmax=1 else: vmin=0; vmax=1.4 plt.imshow(movie_data[ib,j,:,:],vmin=vmin,vmax=vmax) plt.axis('off') plt.subplot(batchsize,n_col,1 + ib*n_col + n_timeseries-1) plt.imshow(movie_pred_data[ib,0,:,:],vmin=0,vmax=1.4) plt.axis('off') plt.subplot(batchsize,n_col,1 + ib*n_col + n_timeseries) plt.imshow(movie_proj_data[ib,0,:,:],vmin=0,vmax=1.4) plt.axis('off') plt.subplot(batchsize,n_col,1 + ib*n_col + n_timeseries+2) plt.imshow(movie_out_data[ib,0,:,:],vmin=0,vmax=1.4) plt.axis('off') plt.suptitle(imgfn) plt.savefig(imgfn) subprocess.call("cp %s ~/public_html/suntomorrow-batch-%s-%s.png"%(imgfn,difficulty,args.gpu),shell=True) print "visualized.", sys.stdout.flush() vis_process = Process(target=visualize_vis_kit, args=(vis_kit,)) vis_process.start()
n_epochs = 50 batch_size = 100 bprop_len = 50 train_data = corpus.train_data whole_len = train_data.shape[0] jump = whole_len / batch_size accum_loss = Variable(np.zeros((), dtype=np.float32)) for i in xrange(jump * n_epochs): x_batch = np.array([train_data[(jump * j + i) % whole_len] for j in xrange(batch_size)]) y_batch = np.array([train_data[(jump * j + i + 1) % whole_len] for j in xrange(batch_size)]) print x_batch loss = net.trainOneStep(x_batch, y_batch) accum_loss += loss if (i + 1) % bprop_len == 0: print "i is %d / %d, loss is %f" % (i + 1, jump * n_epochs, accum_loss.data / bprop_len) optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() accum_loss = Variable(np.zeros((), dtype=np.float32)) optimizer.clip_grads(5.0) optimizer.update() print corpus.decode(net.predict([corpus.encode("Before we proceed")], num=100)[0]) print corpus.decode(net.predict([corpus.encode("My lord")], num=100)[0]) # serializers.save_npz("model/shakespeare.mod", net)
def evaluate(architecture, waves, infos, gpu_id, waveFs, fileParam): if cupy is not None and gpu_id >= 0: xp = cupy cupy.cuda.Device(gpu_id).use() else: xp = np inputLength = totalInputLength(architecture) labels = getLabels() numLabel = len(labels) groupFold = ((0, 1, 2), (3, ), (4, )) devSize = 2**1 devSegmentSecUpper = 10 net = Net(numLabel, architecture, functions.elu) serializers.load_hdf5(fileParam, net) if gpu_id >= 0: net.to_gpu(gpu_id) devSegmentLenUpper = int(devSegmentSecUpper * waveFs) devFold = sorted(set(groupFold[2])) devLabelWave = groupLabelWave((devFold, ), infos)[0] devLabelWave = list( itertools.chain.from_iterable([[(li, i) for i in devLabelWave[la]] for li, la in enumerate(labels)])) devLabelWave = sorted(devLabelWave, key=lambda lw: len(waves[lw[1]])) devBatchIndex = np.array_split(np.arange(len(devLabelWave)), int(np.ceil(len(devLabelWave) / devSize))) devLabelSize = np.zeros(numLabel, int32) for li, wi in devLabelWave: devLabelSize[li] += len(waves[wi]) devWaves = {} for li, wi in devLabelWave: wave = waves[wi] wave = np.concatenate((wave, np.zeros((inputLength - 1) // 2, float32))) devWaves[wi] = wave with chainer.using_config("enable_backprop", False): confusion = np.zeros((numLabel, numLabel), int32) for bi, index in enumerate(devBatchIndex): waveIndex = np.array([devLabelWave[i][1] for i in index]) tru = np.array([devLabelWave[i][0] for i in index]) waveLen = len(devWaves[waveIndex[-1]]) segmentTimes = np.array_split( np.arange(waveLen), int(np.ceil( (waveLen) / devSegmentLenUpper))) net.reset() for si, segTime in enumerate(segmentTimes): t0 = segTime[0] t1 = segTime[-1] + 1 x = np.zeros((len(index), t1 - t0), float32) tr = -np.ones((len(index), t1 - t0), int32) for xi, wi in enumerate(waveIndex): if len(devWaves[wi]) <= t0: continue w = devWaves[wi][t0:t1] x[xi, :len(w)] = w tr[xi, :len(w)] = tru[xi] if t0 < (inputLength - 1) // 2: tr[:, :(inputLength - 1) // 2 - t0] = -1 x = x[:, newaxis, :, newaxis] x = xp.asarray(x) x = Variable(x) x = net(x, False) x.unchain_backward() x = xp.argmax(x.data, axis=1) if gpu_id >= 0: x = cupy.asnumpy(x) x = x.flatten() tr = tr.flatten() for xi, ti in zip(x[tr >= 0], tr[tr >= 0]): confusion[ti, xi] += 1 net.reset() assert (np.sum(confusion, axis=1) == devLabelSize).all() return confusion
def train_dcgan_labeled(gen, retou, dis, epoch0=0): o_gen = optimizers.Adam(alpha=0.0002, beta1=0.5) o_retou = optimizers.Adam(alpha=0.0002, beta1=0.5) o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5) o_gen.setup(gen) o_retou.setup(retou) o_dis.setup(dis) if not args.fresh_start: serializers.load_hdf5("%s/dcgan_model_dis.h5"%(out_model_dir),dis) serializers.load_hdf5("%s/dcgan_model_gen.h5"%(out_model_dir),gen) serializers.load_hdf5("%s/dcgan_model_retou.h5"%(out_model_dir),retou) serializers.load_hdf5("%s/dcgan_state_dis.h5"%(out_model_dir),o_dis) serializers.load_hdf5("%s/dcgan_state_gen.h5"%(out_model_dir),o_gen) serializers.load_hdf5("%s/dcgan_state_retou.h5"%(out_model_dir),o_retou) o_gen.add_hook(chainer.optimizer.WeightDecay(0.00001)) o_retou.add_hook(chainer.optimizer.WeightDecay(0.00001)) o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001)) zvis = (xp.random.uniform(-1, 1, (100, nz), dtype=np.float32)) x_retouch_motif = None retouch_fail_count = 0 last_retouch_loss = 1.2e99 for epoch in xrange(epoch0,n_epoch): print "epoch:", epoch perm = np.random.permutation(n_train) sum_l_dis = np.float32(0) sum_l_gen = np.float32(0) for i in xrange(0, n_train, batchsize): print "train:",i # discriminator # 0: from dataset # 1: from noise #print "load image start ", i x_train = load_dataset() #print "load image done" # train generator z = Variable(xp.random.uniform(-1, 1, (batchsize, nz), dtype=np.float32)) x = gen(z) yl = dis(x) L_gen = F.softmax_cross_entropy(yl, Variable(xp.zeros(batchsize, dtype=np.int32))) L_dis = F.softmax_cross_entropy(yl, Variable(xp.ones(batchsize, dtype=np.int32))) # train discriminator x_train = Variable(cuda.to_gpu(x_train)) yl_train = dis(x_train) softmax_gen = F.softmax(yl).data[:,0] average_softmax=np.average(cuda.to_cpu(softmax_gen)) if math.isnan(average_softmax) : serializers.save_hdf5("%s/NaN_dcgan_model_dis.h5"%(out_model_dir),dis) serializers.save_hdf5("%s/NaN_dcgan_model_gen.h5"%(out_model_dir),gen) serializers.save_hdf5("%s/NaN_dcgan_model_retou.h5"%(out_model_dir),retou) serializers.save_hdf5("%s/NaN_dcgan_state_dis.h5"%(out_model_dir),o_dis) serializers.save_hdf5("%s/NaN_dcgan_state_gen.h5"%(out_model_dir),o_gen) serializers.save_hdf5("%s/NaN_dcgan_state_retou.h5"%(out_model_dir),o_retou) exit() if average_softmax < 1e-3: train_sample_factor = 10.0 elif average_softmax < 1e-2: train_sample_factor = 4.0 elif average_softmax > 0.4: train_sample_factor = 1.0 else: train_sample_factor = 2.0 train_sample_factor = 2.0 L_dis += train_sample_factor * F.softmax_cross_entropy(yl_train, Variable(xp.zeros(batchsize, dtype=np.int32))) #train retoucher if type(x_retouch_motif)==type(None) or retouch_fail_count >= min(1+ epoch, 10): print "Supply new motifs to retoucher." x_retouch_motif = Variable(x.data) retouch_fail_count = 0 last_retouch_loss = 99e99 x3=retou(x_retouch_motif) # let the retoucher make the generated image better yl1st = dis(x3) # and try deceive the discriminator # retoucher want their image to look like those from dataset(zeros), # while discriminators want to classify them as from noise(ones) L_retou = F.softmax_cross_entropy(yl1st, Variable(xp.zeros(batchsize, dtype=np.int32))) L_dis += F.softmax_cross_entropy(yl1st, Variable(xp.ones(batchsize, dtype=np.int32))) o_gen.zero_grads() L_gen.backward() o_gen.update() o_retou.zero_grads() L_retou.backward() o_retou.update() o_dis.zero_grads() L_dis.backward() o_dis.update() retouch_loss = float(str((L_retou).data)) if retouch_loss >= last_retouch_loss: retouch_fail_count += 1 last_retouch_loss = min(retouch_loss,last_retouch_loss) #print "backward done" sum_l_gen += L_gen.data.get() sum_l_dis += L_dis.data.get() x.unchain_backward() x_train.unchain_backward() x3.unchain_backward() x_retouch_motif = x3 L_gen.unchain_backward() L_retou.unchain_backward() L_dis.unchain_backward() print "epoch:",epoch,"iter:",i,"softmax:",average_softmax, "retouch:",retouch_fail_count, retouch_loss if i%image_save_interval==0: n_retou=2 plt.rcParams['figure.figsize'] = (16.0,16.0*n_retou) plt.close('all') vissize = 100 z = zvis z[50:,:] = (xp.random.uniform(-1, 1, (50, nz), dtype=np.float32)) z = Variable(z) x = gen(z, test=True) x_data = x.data.get() imgfn = '%s/vis_%d_%d.png'%(out_image_dir, epoch,i) x_split = F.split_axis(x,vissize,0) def mktitle(x1): d1 = F.softmax(dis(x1,test=True)) def ppr(d): f = float(str(d.data[0,0])) return '{:0.3}'.format(f) ret = '{}'.format(ppr(d1)) return ret for i_ in range(100): tmp = ((np.vectorize(clip_img)(x_data[i_,:,:,:])+1)/2).transpose(1,2,0) plt.subplot(n_retou*10+9,10,1+i_%10+(i_/10)*10*(n_retou+1)) plt.imshow(tmp) plt.axis('off') plt.title(mktitle(x_split[i_]),fontsize=6) r_p_cnt = 0 print "vis-retouch:", for cnt_step in (n_retou-1) * [1]: r_p_cnt+=1 for r_cnt in range(cnt_step): print r_cnt, sys.stdout.flush() x.unchain_backward() x = retou(x, test=True) x3_data = x.data.get() x3_split = F.split_axis(x,vissize,0) for i_ in range(100): tmp = ((np.vectorize(clip_img)(x3_data[i_,:,:,:])+1)/2).transpose(1,2,0) plt.subplot(n_retou*10+9,10,1+i_%10+(i_/10)*10*(n_retou+1)+10*r_p_cnt) plt.imshow(tmp) plt.axis('off') plt.title(mktitle(x3_split[i_]),fontsize=6) plt.suptitle(imgfn) plt.savefig(imgfn) print imgfn subprocess.call("cp %s ~/public_html/dcgan-%d.png"%(imgfn,args.gpu),shell=True) serializers.save_hdf5("%s/dcgan_model_dis.h5"%(out_model_dir),dis) serializers.save_hdf5("%s/dcgan_model_gen.h5"%(out_model_dir),gen) serializers.save_hdf5("%s/dcgan_model_retou.h5"%(out_model_dir),retou) serializers.save_hdf5("%s/dcgan_state_dis.h5"%(out_model_dir),o_dis) serializers.save_hdf5("%s/dcgan_state_gen.h5"%(out_model_dir),o_gen) serializers.save_hdf5("%s/dcgan_state_retou.h5"%(out_model_dir),o_retou) # we don't have enough disk for history #history_dir = 'history/%d-%d'%(epoch, i) #subprocess.call("mkdir -p %s "%(history_dir),shell=True) #subprocess.call("cp %s/*.h5 %s "%(out_model_dir,history_dir),shell=True) print 'epoch end', epoch, sum_l_gen/n_train, sum_l_dis/n_train
if args.gpu >= 0: x_t = cuda.to_gpu(x_t) y_t = cuda.to_gpu(y_t) state, loss_i = model.forward_one_step(x_t, y_t, state, dropout_ratio=args.dropout) loss += loss_i now = time.time() end_time += now - cur_at iterations_count += 1 print "loss_all=" + str(loss.data) print "{}, train_loss = {}, time = {:.4f}".format( iterations_count, loss.data / (len(train_data[i % whole_len]) - 1), now - cur_at ) cur_at = now optimizer.zero_grads() loss.backward() loss.unchain_backward() optimizer.clip_grads(grad_clip) optimizer.update() if (i + 1) == (whole_len * n_epochs): cuda.cupy.save("l1_x_W.npy", model.l1_x.W) cuda.cupy.save("l1_x_b.npy", model.l1_x.b) cuda.cupy.save("l1_h_W.npy", model.l1_h.W) cuda.cupy.save("l1_h_b.npy", model.l1_h.b) cuda.cupy.save("l6_W.npy", model.l6.W) cuda.cupy.save("l6_b.npy", model.l6.b) if ((i + 1) % whole_len) == 0: epoch += 1 train_loss_all.append(loss.data.get() / len(train_data[i % whole_len])) for k in xrange(whole_val_len): val_state = make_initial_state(n_units) for key, value in val_state.items():
if args.gpu >=0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch) state, loss_i = model.forward_one_step(x_batch, y_batch, state, dropout_ratio=args.dropout) accum_loss += loss_i if (i + 1) % bprop_len == 0: # Run truncated BPTT now = time.time() print '{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at) loss_file.write('{}\n'.format(accum_loss.data / bprop_len)) cur_at = now optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate if args.gpu >= 0: accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros(()).astype(np.float32)) optimizer.clip_grads(grad_clip) optimizer.update() if args.enable_checkpoint: if (i + 1) % 10000 == 0: fn = ('%s/charrnn_epoch_%.2f.chainermodel' % (args.checkpoint_dir, float(i)/jump)) pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb')) if (i + 1) % jump == 0: epoch += 1
state, loss_i = model.forward_one_step(x_t, y_t, state, dropout_ratio=args.dropout) loss += loss_i now = time.time() end_time += now - cur_at iterations_count += 1 print 'loss_all=' + str(loss.data) print '{}, train_loss = {}, time = {:.4f}'.format( iterations_count, loss.data / (len(train_data[i % whole_len]) - 1), now - cur_at) cur_at = now optimizer.zero_grads() loss.backward() loss.unchain_backward() optimizer.clip_grads(grad_clip) optimizer.update() if (i + 1) == (whole_len * n_epochs): cuda.cupy.save('l1_x_W.npy', model.l1_x.W) cuda.cupy.save('l1_x_b.npy', model.l1_x.b) cuda.cupy.save('l1_h_W.npy', model.l1_h.W) cuda.cupy.save('l1_h_b.npy', model.l1_h.b) cuda.cupy.save('l6_W.npy', model.l6.W) cuda.cupy.save('l6_b.npy', model.l6.b) if ((i + 1) % whole_len) == 0: epoch += 1 train_loss_all.append(loss.data.get() / len(train_data[i % whole_len])) for k in xrange(whole_val_len): val_state = make_initial_state(n_units) for key, value in val_state.items():
if args.gpu >=0: x_batch = cuda.to_gpu(x_batch) y_batch = cuda.to_gpu(y_batch) state, loss_i = model.forward_one_step(x_batch, y_batch, state, dropout_ratio=args.dropout) accum_loss += loss_i if (i + 1) % bprop_len == 0: # Run truncated BPTT now = time.time() print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at)) cur_at = now optimizer.zero_grads() accum_loss.backward() accum_loss.unchain_backward() # truncate if args.gpu >= 0: accum_loss = Variable(cuda.zeros(())) else: accum_loss = Variable(np.zeros((), dtype=np.float32)) optimizer.clip_grads(grad_clip) optimizer.update() if (i + 1) % 10000 == 0: fn = ('%s/charrnn_epoch_%.2f.chainermodel' % (args.checkpoint_dir, float(i)/jump)) pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb')) if (i + 1) % jump == 0: epoch += 1
def update_core(self): enc_optimizer = self.get_optimizer('enc') dec_optimizer = self.get_optimizer('dec') dis_optimizer = self.get_optimizer('dis') enc, dec, dis = self.enc, self.dec, self.dis xp = enc.xp batch = self.get_iterator('main').next() batchsize = len(batch) in_ch = batch[0][0].shape[0] out_ch = batch[0][1].shape[0] w_in = 256 w_out = 256 x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f") t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f") for i in range(batchsize): x_in[i, :] = xp.asarray(batch[i][0]) t_out[i, :] = xp.asarray(batch[i][1]) x_in = Variable(x_in) z = enc(x_in) x_out = dec(z) y_fake = dis(x_in, x_out) y_real = dis(x_in, t_out) enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake) for z_ in z: z_.unchain_backward() dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake) x_in.unchain_backward() x_out.unchain_backward() dis_optimizer.update(self.loss_dis, dis, y_real, y_fake) eval_interval = 10 if self.iteration % eval_interval == 0: eval_fp = open("eval_log.txt", "a") batch = self.get_iterator("test").next() batchsize = len(batch) in_ch = batch[0][0].shape[0] out_ch = batch[0][1].shape[0] w_in = 256 w_out = 256 x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f") t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f") for i in range(batchsize): x_in[i, :] = xp.asarray(batch[i][0]) t_out[i, :] = xp.asarray(batch[i][1]) x_in = Variable(x_in) z = enc(x_in) x_out = dec(z) y_fake = dis(x_in, x_out) y_real = dis(x_in, t_out) def eval_enc(self, enc, x_out, t_out, y_out, lam1=100, lam2=1): batchsize, _, w, h = y_out.data.shape loss_rec = lam1 * (F.mean_absolute_error(x_out, t_out)) loss_adv = lam2 * F.sum(F.softplus(-y_out)) / batchsize / w / h loss = loss_rec + loss_adv chainer.report({'eval_loss': eval_fp}, enc) return loss def eval_dis(self, dis, y_in, y_out): batchsize, _, w, h = y_in.data.shape L1 = F.sum(F.softplus(-y_in)) / batchsize / w / h L2 = F.sum(F.softplus(y_out)) / batchsize / w / h loss = L1 + L2 chainer.report({'eval_loss': eval_fp}, dis) return loss eval_fp.write('{}\t{}\t\t{}\t\t{}\n'.format( self.epoch, self.iteration, eval_enc(self.loss_enc, enc, x_out, t_out, y_fake), eval_dis(self.loss_dis, dis, y_real, y_fake))) eval_fp.close()