Esempio n. 1
0
    def update_upscaler(self, lam1, lam2):
        opt_gen = self.get_optimizer('gen_up')
        opt_dis = self.get_optimizer('dis_up')

        xp = self.upscaler.xp

        batch_a = self.get_iterator('main').next()
        x_s_nn = Variable(xp.asarray([b[0] for b in batch_a]).astype('f'))
        x_l = Variable(xp.asarray([b[1] for b in batch_a]).astype('f'))

        x_s_nn_l = self.upscaler.gen(x_s_nn)
        y_s_nn_l = self.upscaler.dis(x_s_nn, x_s_nn_l)
        y_l = self.upscaler.dis(x_s_nn, x_l)

        self.upscaler.gen.cleargrads()
        loss_gen = self.loss_gen(self.upscaler.gen, x_s_nn_l, x_l, y_s_nn_l,
                                 lam1, lam2)
        loss_gen.backward()
        opt_gen.update()

        x_l.unchain_backward()
        x_s_nn.unchain_backward()

        self.upscaler.dis.cleargrads()
        loss_dis = self.loss_dis(self.upscaler.dis, y_l, y_s_nn_l)
        loss_dis.backward()
        opt_dis.update()
Esempio n. 2
0
    def update_downscaler(self, lam1, lam2):
        opt_gen = self.get_optimizer('gen_down')
        opt_dis = self.get_optimizer('dis_down')

        xp = self.downscaler.xp

        batch_b = self.get_iterator('trainB').next()
        x_s_rand = Variable(xp.asarray([b[0] for b in batch_b]).astype('f'))
        x_s = Variable(xp.asarray([b[1] for b in batch_b]).astype('f'))

        self.upscaler.gen.fix_broken_batchnorm()
        with chainer.using_config('train', False), chainer.using_config(
                'enable_back_prop', False):
            x_sl = self.upscaler.gen(x_s_rand)
        x_sl.unchain_backward()

        x_sls = self.downscaler.gen(x_sl)
        y_sls = self.downscaler.dis(x_sl, x_sls)
        y_s = self.downscaler.dis(x_sl, x_s)

        self.downscaler.gen.cleargrads()
        loss_gen = self.loss_gen(self.downscaler.gen, x_sls, x_s, y_sls, lam1,
                                 lam2)
        loss_gen.backward()
        opt_gen.update()

        x_s.unchain_backward()
        x_sls.unchain_backward()

        self.downscaler.dis.cleargrads()
        loss_dis = self.loss_dis(self.downscaler.dis, y_s, y_sls)
        loss_dis.backward()
        opt_dis.update()
Esempio n. 3
0
    def update_core(self):
        enc_optimizer = self.get_optimizer('enc')
        dec_optimizer = self.get_optimizer('dec')
        # dis_optimizer = self.get_optimizer('dis')

        enc, dec = self.enc, self.dec  #, self.dis
        xp = enc.xp

        batch = self.get_iterator('main').next()
        batchsize = len(batch)
        in_ch = batch[0][0].shape[0]
        out_ch = batch[0][1].shape[0]
        w_in = 256  # change
        w_out = 256  # change

        x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f")
        t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f")

        for i in range(batchsize):
            x_in[i, :] = xp.asarray(batch[i][0])
            t_out[i, :] = xp.asarray(batch[i][1])
        x_in = Variable(x_in)

        z = enc(x_in)
        x_out = dec(z)

        # y_fake = dis(x_in, x_out) # 入力と, generateされたものは偽物ペア
        # y_real = dis(x_in, t_out) # 入力と, 本物のペア

        enc_optimizer.update(self.loss_enc, enc, x_out, t_out)
        for z_ in z:
            z_.unchain_backward()
        dec_optimizer.update(self.loss_dec, dec, x_out, t_out)
        x_in.unchain_backward()
        x_out.unchain_backward()
Esempio n. 4
0
    def update_core(self):
        opt_gen = self.get_optimizer('gen')
        opt_dis = self.get_optimizer('dis')

        gen, dis = self.model.gen, self.model.dis
        xp = gen.xp

        batch = self.get_iterator('main').next()
        batchsize = len(batch)

        x_in = Variable(xp.asarray([b[0] for b in batch]).astype('f'))
        t_out = Variable(xp.asarray([b[1] for b in batch]).astype('f'))

        x_out = gen(x_in)

        y_fake = dis(x_in, x_out)
        y_real = dis(x_in, t_out)

        gen.cleargrads()
        self.loss_gen(gen, x_out, t_out, y_fake).backward()
        opt_gen.update()

        x_in.unchain_backward()
        x_out.unchain_backward()
        dis.cleargrads()
        self.loss_dis(dis, y_real, y_fake).backward()
        opt_dis.update()
Esempio n. 5
0
def optimizeCRNN(iterNum,maxIndex,indicies):
    batchSize = 1000
    model = EvalCRNN(maxIndex,500)
    print(len(indicies),computeEntropy(maxIndex,indicies))
    learningRate = 0.001
    epoch = 3 
    for j in range(epoch):
        
        my_optimizer = optimizers.RMSpropGraves(lr = learningRate)
        my_optimizer.setup(model) 
        my_optimizer.add_hook(optimizer.GradientClipping(1))
        
        model.cRNN.reset()
        
        loss = Variable(np.array([[0]]))
        for i in range(iterNum):
            t1 = time.clock()
            model.zerograds()
            loss.unchain_backward()
            loss = model(indicies[batchSize*i:batchSize*(i+1)],iterNum*batchSize)
            loss.backward()
            t2 = time.clock()
            
            msg = "iter: " + str(i + iterNum * j + 1) + "/" + str(iterNum * epoch) 
            msgLoss = "loss: " + str(loss.data/batchSize)
            msgNorm = "grad: " + str(my_optimizer.compute_grads_norm())
            msgTime = "time: " + str(t2 - t1) + " seconds"
            print(msgLoss,msgNorm,msg,msgTime)
            my_optimizer.update()

        learningRate *= 0.50

    print(model(indicies[batchSize*(iterNum):batchSize*(iterNum+10)]).data/(batchSize*10))
    return model.cRNN
    def update_core(self):        
        enc_optimizer = self.get_optimizer('enc')
        dec_optimizer = self.get_optimizer('dec')
        dis_optimizer = self.get_optimizer('dis')
        
        enc, dec, dis = self.enc, self.dec, self.dis
        xp = enc.xp

        batch = self.get_iterator('main').next()
        batchsize = len(batch)
        in_ch = batch[0][0].shape[0]
        """ Edit g """
        #print("Batch size", len(batch))
        #print("Batch all", batch)
        #print("Batch -1[0]", batch[-1][0])
        #print("Batch -1[1]", batch[-1][1])
        #print("Batch -1[0][0]", batch[-1][0][0])
        """ 最後のインデックスにアクセスして、情報を取り出す """
        """ これは、バッチサイズが1のときのみ有効であるからして、気をつけること """
        #path_through1 = []
        #for in_contain in batch[-1][0][-1]:
            #print("IN_CONTAIN", in_contain)
        #    for c in in_contain:
        #        path_through1.append(c)
        #print("path-through len", len(path_through1))
        """ ここまで """

        out_ch = batch[0][1].shape[0]
        w_in = 256
        w_out = 256
        
        x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f")
        t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f")
        
        for i in range(batchsize):
            x_in[i,:] = xp.asarray(batch[i][0])
            t_out[i,:] = xp.asarray(batch[i][1])
        x_in = Variable(x_in)
        
        z = enc(x_in, test=False)
        """ このzベクトルを変化させれば、任意の方向性に持っていくことができる """
        #print("z", z)
        """ Zを直接編集するのは危険なので、decの引数を増やして対処したほうが良さそう """
        #x_out = dec(z, path_through1, test=False)
        x_out = dec(z, test=False)

        y_fake = dis(x_in, x_out, test=False)
        y_real = dis(x_in, t_out, test=False)


        enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake)
        for z_ in z:
            z_.unchain_backward()
        dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake)
        x_in.unchain_backward()
        x_out.unchain_backward()
        dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
Esempio n. 7
0
    def update_core(self):
        #print("update_coreだよ")

        enc_optimizer = self.get_optimizer('enc')
        dec_optimizer = self.get_optimizer('dec')
        dis_optimizer = self.get_optimizer('dis')

        enc, dec, dis = self.enc, self.dec, self.dis
        xp = enc.xp

        batch = self.get_iterator('main').next()
        batchsize = len(batch)
        #in_ch = batch[0][0].shape[0]
        #out_ch = batch[0][1].shape[0]
        in_ch = 3
        out_ch = 3
        w_in = 115
        h_in = 149
        w_out = 115
        h_out = 149

        x_in = xp.zeros((batchsize, in_ch, w_in, h_in)).astype("f")
        t_out = xp.zeros((batchsize, out_ch, w_out, h_out)).astype("f")

        for i in range(batchsize):
            x_in[i, :] = xp.asarray(batch[i][0])
            t_out[i, :] = xp.asarray(batch[i][1])
        x_in = Variable(x_in)

        #print("ここまで~")

        z = enc(x_in)

        #print("ちょい時間かかる~")

        x_out = dec(z)

        #print("ほんのり時間かかる~")

        y_fake = dis(x_in, x_out)
        y_real = dis(x_in, t_out)

        #print("disできた~")

        enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake)

        #print("すごい時間かかる~")

        for z_ in z:
            z_.unchain_backward()

        dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake)
        x_in.unchain_backward()
        x_out.unchain_backward()
        dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
Esempio n. 8
0
class Simple(LSTMBase):
    def __init__(self,
                 vocab_size,
                 dim_embed=33 * 3,
                 dim1=400,
                 dim2=400,
                 dim3=200,
                 class_size=None):
        if class_size is None:
            class_size = vocab_size
        super(Simple, self).__init__(
            embed2=L.EmbedID(vocab_size, dim_embed),
            lay2=L.LSTM(dim_embed, dim1, forget_bias_init=0),
            lay_int=L.LSTM(dim1, dim2, forget_bias_init=0),
            lin1=L.Linear(dim2, dim3),
            lin2=L.Linear(dim3, class_size),
        )
        self.vocab_size = vocab_size
        try:
            cuda.check_cuda_available()
            self.to_gpu()
            print 'run on the GPU.'
        except:
            print 'run on the CPU.'
        self.dim_embed = dim_embed
        self.optimizer = optimizers.MomentumSGD()
        self.optimizer.setup(self)
        self.loss_var = Variable(xp.zeros((), dtype=np.float32))
        self.reset_state()

    def __call__(self, xs, train):
        x_3gram = xs[0]
        sp2 = xs[1]

        x_uni = x_3gram[:, 0]
        y = Variable(x_uni, volatile=not train)
        y = self.embed2(y)
        y2 = self.lay2(y)
        y2 = self.lay_int(y2)
        y = y2
        y = self.lin1(F.dropout(y, train=train))
        y = F.relu(y)
        y = self.lin2(F.dropout(y, train=train))

        return y

    def reset_state(self):
        if self.loss_var is not None:
            self.loss_var.unchain_backward()  # for safty
        self.loss_var = Variable(xp.zeros((),
                                          dtype=xp.float32))  # reset loss_var
        self.lay2.reset_state()
        self.lay_int.reset_state()
        return
Esempio n. 9
0
class StatefulAgent(Agent):
    def __init__(self, model, optimizer=None, gpu=-1, cutoff=None, last=False):

        super(StatefulAgent, self).__init__(model,
                                            optimizer=optimizer,
                                            gpu=gpu,
                                            last=last,
                                            cutoff=cutoff)

        # cutoff for BPTT
        self.cutoff = cutoff

        # whether to update from loss in last step only
        self.last = last

        # keep track of loss for truncated BPTT
        self.loss = Variable(self.xp.zeros((), 'float32'))

    def run(self, data, train=True, idx=None, final=False):

        if (idx) % self.cutoff == 0:
            self.reset()

        loss = self.model(map(lambda x: Variable(self.xp.asarray(x)), data),
                          train=True)

        if self.last:  # used in case we propagate back at end of trials only
            if ((idx + 1) % self.cutoff) == 0:
                self.loss = loss
            else:
                loss = Variable(self.xp.zeros((), 'float32'))
        else:
            self.loss += loss

        # normalize by number of datapoints in minibatch
        _loss = float(loss.data)

        # backpropagate if we reach the cutoff for truncated backprop or if we processed the last batch
        if train and ((self.cutoff and
                       ((idx + 1) % self.cutoff) == 0) or final):
            self.optimizer.zero_grads()
            self.loss.backward()
            self.loss.unchain_backward()
            self.optimizer.update()
            self.loss = Variable(self.xp.zeros((), 'float32'))

        if not train:
            self.loss.unchain_backward()

        return _loss
Esempio n. 10
0
    def train(self, x):
        # Encoder/Decoder
        h = self.encoder(x)
        xp = cuda.get_array_module(x)
        z = Variable(
            cuda.to_gpu(
                xp.random.rand(x.shape[0], self.dim).astype(xp.float32),
                self.device))
        hz = self.generator0(z)
        x_rec = self.decoder(h, hz)
        l_rec = self.recon_loss(x, x_rec)
        self.cleargrads()
        l_rec.backward()
        self.optimizer_enc.update()
        self.optimizer_dec.update()

        # Discriminator
        h = Variable(h.data)  # disconnect
        h.unchain_backward()
        xp = cuda.get_array_module(x)
        z = Variable(
            cuda.to_gpu(
                xp.random.rand(x.shape[0], self.dim).astype(xp.float32),
                self.device))
        hz = self.generator0(z)
        x_gen = self.generator(h, hz)
        d_x_gen = self.discriminator(x_gen, h)
        d_x_real = self.discriminator(x, h)
        l_dis = self.lsgan_loss(d_x_gen, d_x_real)
        self.cleargrads()
        l_dis.backward()
        self.optimizer_dis.update()

        # Generator
        xp = cuda.get_array_module(x)
        z = Variable(
            cuda.to_gpu(
                xp.random.rand(x.shape[0], self.dim).astype(xp.float32),
                self.device))
        hz = self.generator0(z)
        x_gen = self.generator(h, hz)
        d_x_gen = self.discriminator(x_gen, h)
        h_gen = self.encoder(x_gen)
        l_gen = self.lsgan_loss(d_x_gen)
        self.cleargrads()
        l_gen.backward()
        self.optimizer_dec.update()
        self.optimizer_gen.update()
Esempio n. 11
0
    def update_core(self):
        enc_optimizer = self.get_optimizer('enc')
        dec_optimizer = self.get_optimizer('dec')
        dis_optimizer = self.get_optimizer('dis')

        enc, dec, dis = self.enc, self.dec, self.dis
        xp = enc.xp

        batch = self.get_iterator('main').next()
        batchsize = len(batch)
        in_ch = batch[0][0].shape[0]
        out_ch = batch[0][1].shape[0]
        w_in = 256
        w_out = 256

        x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f")
        t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f")

        for i in range(batchsize):
            x_in[i, :] = xp.asarray(batch[i][0])
            t_out[i, :] = xp.asarray(batch[i][1])
        x_in = Variable(x_in)

        z = enc(x_in, test=False)
        x_out = dec(z, test=False)

        y_fake = dis(x_in, x_out, test=False)
        y_real = dis(x_in, t_out, test=False)

        enc_optimizer.update(self.loss_enc,
                             enc,
                             x_out,
                             t_out,
                             y_fake,
                             lam1=self.lam1,
                             lam2=self.lam2)
        for z_ in z:
            z_.unchain_backward()
        dec_optimizer.update(self.loss_dec,
                             dec,
                             x_out,
                             t_out,
                             y_fake,
                             lam1=self.lam1,
                             lam2=self.lam2)
        x_in.unchain_backward()
        x_out.unchain_backward()
        dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
Esempio n. 12
0
    def update_core(self):
        enc_optimizer = self.get_optimizer('enc')
        dec_optimizer = self.get_optimizer('dec')
        dis_optimizer = self.get_optimizer('dis')

        enc, dec, dis = self.enc, self.dec, self.dis
        xp = enc.xp

        batch = self.get_iterator('main').next()
        batchsize = len(batch)
        in_ch = batch[0][0].shape[0]
        out_ch = batch[0][1].shape[0]
        # w_in = 256
        # w_out = 256
        w_in = batch[0][0].shape[1]
        h_in = batch[0][0].shape[2]
        w_out = w_in
        h_out = h_in

        x_in = xp.zeros((batchsize, in_ch, w_in, h_in)).astype("f")
        t_out = xp.zeros((batchsize, out_ch, w_out, h_out)).astype("f")

        for i in range(batchsize):
            #todo batchsize 2以上だとスペクトログラム画像の横サイズ(時間方向)がサンプル間で異なることがあり、ndarrayへマージできず学習がエラー停止する。強制的にデータ除外することで対応中。
            if (x_in[i, :].shape != batch[i][0].shape):
                print(
                    "skipped training_audio_sample because spectrogram shape does not match.(comes from program bug.)\r\n expected:{}, actual:{}"
                    .format(x_in[i, :].shape, batch[i][0].shape))
                continue

            x_in[i, :] = xp.asarray(batch[i][0])
            t_out[i, :] = xp.asarray(batch[i][1])
        x_in = Variable(x_in)

        z = enc(x_in)
        x_out = dec(z)

        y_fake = dis(x_in, x_out)
        y_real = dis(x_in, t_out)

        enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake)
        for z_ in z:
            z_.unchain_backward()
        dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake)
        x_in.unchain_backward()
        x_out.unchain_backward()
        dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
Esempio n. 13
0
    def update_core(self):
        enc_optimizer = self.get_optimizer('enc')
        dec_optimizer = self.get_optimizer('dec')
        dis_optimizer = self.get_optimizer('dis')

        enc, dec, dis = self.enc, self.dec, self.dis
        xp = enc.xp

        batch = self.get_iterator('main').next()
        batchsize = len(batch)
        in_ch = batch[0][0].shape[0]
        out_ch = batch[0][1].shape[0]
        w_in_h = 512
        w_in_w = 128
        w_out_h = 512
        w_out_w = 128

        x_in = xp.zeros((batchsize, in_ch, w_in_h, w_in_w)).astype("f")
        t_out = xp.zeros((batchsize, out_ch, w_out_h, w_out_w)).astype("f")

        for i in range(batchsize):
            x_in[i,:] = xp.asarray(batch[i][0])
            t_out[i,:] = xp.asarray(batch[i][1])
        x_in = Variable(x_in)

        z = enc(x_in)
        x_out = dec(z)
        
        st_in_h = int(w_in_h / 3)
        st_in_w = int(w_in_w / 4 * 3)
        #x_in = Variable(x_in.data[:,:,:,w_in_st:w_in_w])
        x_in = x_in[:,:,st_in_h*2:w_in_h,st_in_w:w_in_w]
        x_out = x_out[:,:,st_in_h*2:w_in_h,st_in_w:w_in_w]
        t_out = t_out[:,:,st_in_h*2:w_in_h,st_in_w:w_in_w]
        y_fake = dis(x_in, x_out)
        y_real = dis(x_in, t_out)


        enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake)
        for z_ in z:
            z_.unchain_backward()
        dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake)
        x_in.unchain_backward()
        x_out.unchain_backward()
        dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
Esempio n. 14
0
    def train(self, x):
        # Encoder/Decoder
        h = self.encoder(x)
        xp = cuda.get_array_module(x)
        z = Variable(cuda.to_gpu(xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device))
        hz = self.generator0(z)
        x_rec = self.decoder(h, hz)
        l_rec = self.recon_loss(x, x_rec)
        self.cleargrads()
        l_rec.backward()
        self.optimizer_enc.update()
        self.optimizer_dec.update()

        # Discriminator
        h = Variable(h.data)  # disconnect
        h.unchain_backward()
        xp = cuda.get_array_module(x)
        z = Variable(cuda.to_gpu(xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device))
        hz = self.generator0(z)
        x_gen = self.generator(h, hz)
        d_x_gen = self.discriminator(x_gen, h)
        d_x_real = self.discriminator(x, h)
        l_dis = self.lsgan_loss(d_x_gen, d_x_real)
        self.cleargrads()
        l_dis.backward()
        self.optimizer_dis.update()
        
        # Generator
        xp = cuda.get_array_module(x)
        z = Variable(cuda.to_gpu(xp.random.rand(x.shape[0], self.dim).astype(xp.float32), self.device))
        hz = self.generator0(z)
        x_gen = self.generator(h, hz)
        d_x_gen = self.discriminator(x_gen, h)
        h_gen = self.encoder(x_gen)
        l_gen = self.lsgan_loss(d_x_gen)
        self.cleargrads()
        l_gen.backward()
        self.optimizer_dec.update()
        self.optimizer_gen.update()
Esempio n. 15
0
    def train(self, data):

        if not self.cutoff:
            cutoff = data.nbatches
        else:
            cutoff = self.cutoff

        self.model.predictor.reset_state()

        cumloss = self.xp.zeros((), 'float32')

        loss = Variable(self.xp.zeros((), 'float32'))

        # check if we are in train or test mode (used e.g. for dropout)
        self.model.predictor.test = False
        self.model.predictor.train = True

        for _x, _t in data:

            x = Variable(_x)
            t = Variable(_t)
            self.model.predictor(x)
            # backpropagate if we reach the cutoff for truncated backprop or if we processed the last batch
            if data.step % cutoff == 0 or data.step == data.nbatches:

                loss += self.model(x, t)
                self.optimizer.zero_grads()

                loss.backward()
                loss.unchain_backward()
                self.optimizer.update()
                #self.model.predictor[0][0].U.W.data[10:,:]=0

                cumloss += loss.data
                loss = Variable(self.xp.zeros((), 'float32'))
                self.model.predictor.reset_state()

        return float(cumloss / (data.batch_ind.shape[1]))
Esempio n. 16
0
    def update_core(self):
        enc_optimizer = self.get_optimizer('enc')
        dec_optimizer = self.get_optimizer('dec')
        dis_optimizer = self.get_optimizer('dis')

        enc, dec, dis = self.enc, self.dec, self.dis
        xp = enc.xp

        batch = self.get_iterator('main').next()
        batchsize = len(batch)
        in_ch = batch[0][0].shape[0]
        out_ch = batch[0][1].shape[0]
        w_in = 256
        w_out = 256

        x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f")
        t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f")

        for i in range(batchsize):
            x_in[i, :] = xp.asarray(batch[i][0])
            t_out[i, :] = xp.asarray(batch[i][1])
        x_in = Variable(x_in)

        z = enc(x_in)
        x_out = dec(z)

        y_fake = dis(x_in, x_out)
        y_real = dis(x_in, t_out)

        enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake)
        for z_ in z:
            z_.unchain_backward()
        dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake)
        x_in.unchain_backward()
        x_out.unchain_backward()
        dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)
Esempio n. 17
0
    def train(self, words, steps, batchsize=100, sequence_length=10):
        """ Train the Predictor's model on words for steps number of steps. """

        whole_len = len(words)
        train_data = np.ndarray(whole_len, dtype=np.int32)
        jumps = steps * sequence_length

        # Initialize training data and maybe vocab.
        if self.vocab is None:
            vocab_initializing = True
            self.vocab = {}
        for i, word in enumerate(words):
            if vocab_initializing:
                if word not in self.vocab:
                    self.vocab[word] = len(self.vocab)
            train_data[i] = self.vocab[word]
        vocab_initializing = False

        print 'corpus length:', len(words)
        print 'self.vocab size:', len(self.vocab)

        # Initialize base model (if we need to)
        if self.model is None:
            self.model = BaseRNN(len(self.vocab), self.units)

        if self.gpu >= 0:
            cuda.get_device(self.gpu).use()
            self.model.to_self.gpu()

        optimizer = optimizers.RMSprop(lr=self.settings.learning_rate,
                                       alpha=self.settings.decay_rate,
                                       eps=1e-8)
        optimizer.setup(self.model)

        jumpsPerEpoch = whole_len / batchsize
        epoch = 0
        start_at = time.time()
        cur_at = start_at
        state = make_initial_state(self.units, batchsize=batchsize)

        if self.gpu >= 0:
            accum_loss = Variable(cuda.zeros(()))
            for _, value in state.items():
                value.data = cuda.to_self.gpu(value.data)
        else:
            accum_loss = Variable(np.zeros((), dtype=np.float32))

        print 'going to train {} iterations'.format(steps)
        for i in xrange(jumps):
            x_batch = np.array([
                train_data[(jumpsPerEpoch * j + i) % whole_len]
                for j in xrange(batchsize)
            ])
            y_batch = np.array([
                train_data[(jumpsPerEpoch * j + i + 1) % whole_len]
                for j in xrange(batchsize)
            ])

            if self.gpu >= 0:
                x_batch = cuda.to_self.gpu(x_batch)
                y_batch = cuda.to_self.gpu(y_batch)

            state, loss_i = self.model.forward_one_step(
                x_batch, y_batch, state, dropout_ratio=self.settings.dropout)
            accum_loss += loss_i

            if (i + 1) % sequence_length == 0:
                now = time.time()
                print '{}/{}, train_loss = {}, time = {:.2f}'.format(
                    (i + 1) / sequence_length, steps,
                    accum_loss.data / sequence_length, now - cur_at)
                cur_at = now

                optimizer.zero_grads()
                accum_loss.backward()
                accum_loss.unchain_backward()  # truncate
                if self.gpu >= 0:
                    accum_loss = Variable(cuda.zeros(()))
                else:
                    accum_loss = Variable(np.zeros((), dtype=np.float32))

                optimizer.clip_grads(self.settings.grad_clip)
                optimizer.update()

            if (i + 1) % jumpsPerEpoch == 0:
                epoch += 1

                if epoch >= self.settings.learning_rate_decay_after:
                    optimizer.lr *= self.settings.learning_rate_decay
                    print 'decayed self.settings.learning rate by a factor {} to {}'.format(
                        self.settings.learning_rate_decay, optimizer.lr)
def findNumEpoch(architecture, waves, trues, labels, infos, gpu_id, waveFs):
    if cupy is not None and gpu_id >= 0:
        xp = cupy
        cupy.cuda.Device(gpu_id).use()
    else:
        xp = np

    valIndex = coreTestIndex(infos)
    np.random.seed(0)
    insIndex, devIndex = traGroupIndex(infos, 2)
    insIndex = np.array(insIndex)
    insLabelIndexTime = makeLabelIndexTime(insIndex, labels, trues)

    insLabelSize = 2**2
    devEpoch = 2**5
    convergenceEpoch = 2**5 * devEpoch

    devBatchSizeUpper = 2**8
    devSegmentSecUpper = 0.1
    devSegmentLenUpper = int(devSegmentSecUpper * waveFs)

    devIndex = sorted(devIndex, key=lambda i: len(waves[i]))
    devIndex = np.array(devIndex)
    devBatchIndex = np.array_split(
        devIndex, int(np.ceil(len(devIndex) / devBatchSizeUpper)))
    devLabelSize = np.zeros(len(labels), int32)
    for i in devIndex:
        for li, la in enumerate(labels):
            devLabelSize[li] += (trues[i] == li).sum()

    inputLength = totalInputLength(architecture)

    np.random.seed()
    seed = np.random.randint(0, np.iinfo(int32).max)
    np.random.seed(seed)

    net = Net(len(labels), architecture, functions.elu)
    opt = optimizers.Adam(1e-4)
    # 	opt=Eve(1e-4)
    opt.setup(net)
    if gpu_id >= 0: net.to_gpu(gpu_id)

    remainingInsLabelIndexTime = [
        np.random.permutation(lt) for lt in insLabelIndexTime
    ]

    epoch = 0
    bestEpoch = 0
    epochIncorrect = {}
    while epoch < bestEpoch + convergenceEpoch:
        for li, lit in enumerate(remainingInsLabelIndexTime):
            if len(lit) < insLabelSize:
                remainingInsLabelIndexTime[li] = np.concatenate(
                    (lit, np.random.permutation(insLabelIndexTime[li])))
        x, tr = makeInpTru(labels, insLabelSize, inputLength,
                           remainingInsLabelIndexTime, waves, trues)

        x = x[:, newaxis, :, newaxis]
        x = xp.asarray(x)
        x = Variable(x)
        x = net.callSingle(x, True)
        tr = tr[..., newaxis, newaxis]
        tr = xp.asarray(tr)
        e = functions.softmax_cross_entropy(x, tr, normalize=True)

        net.cleargrads()
        e.backward()
        e.unchain_backward()
        opt.update()
        # 		opt.update(loss=e.data)

        if epoch % devEpoch != devEpoch - 1:
            epoch += 1
            continue
        incorrect = xp.zeros(len(labels), int32)
        with chainer.using_config("enable_backprop", False):
            for index in devBatchIndex:
                waveLen = len(waves[index[-1]])
                segmentTimes = np.array_split(
                    np.arange(waveLen),
                    int(np.ceil(waveLen / devSegmentLenUpper)))
                net.reset()
                for si, segTime in enumerate(segmentTimes):
                    t0 = segTime[0]
                    t1 = segTime[-1] + 1
                    x = np.zeros((len(index), t1 - t0), float32)
                    tr = -np.ones((len(index), t1 - t0), int32)
                    for xi, wi in enumerate(index):
                        if len(waves[wi]) > t0:
                            w = waves[wi][t0:t1]
                            x[xi, :len(w)] = w
                        if len(waves[wi]) > t0:
                            tr[xi, :len(w)] = trues[wi][t0:t1]

                    x = x[:, newaxis, :, newaxis]
                    x = xp.asarray(x)
                    x = Variable(x)
                    x = net(x, False)
                    x.unchain_backward()

                    x = xp.argmax(x.data, axis=1)
                    tr = tr[..., newaxis]
                    tr = xp.asarray(tr)
                    for li, la in enumerate(labels):
                        incorrect[li] += (x[tr == li] != li).sum()

            net.reset()
            if cupy is not None: incorrect = cupy.asnumpy(incorrect)
            incorrect = (incorrect / devLabelSize).mean()
            print("epoch", epoch, "incorrect", incorrect)

            if len(epochIncorrect) == 0 or incorrect < min(
                [epochIncorrect[ep] for ep in epochIncorrect]):
                bestEpoch = epoch
            epochIncorrect[epoch] = incorrect
            epoch += 1

    devEpochs = np.array(sorted(epochIncorrect), int32)
    epochIncorrect = np.array([epochIncorrect[ep] for ep in devEpochs])
    bestIncorrect = epochIncorrect.min()

    return bestEpoch, bestIncorrect, seed
Esempio n. 19
0
class Bigram(LSTMBase):
    def __init__(self,
                 vocab_size,
                 dim_embed=33 * 3,
                 dim1=400,
                 dim2=400,
                 dim3=200,
                 class_size=None):
        if class_size is None:
            class_size = vocab_size
        super(Bigram, self).__init__(
            embed_uni=L.EmbedID(vocab_size, dim_embed),
            embed_bi=L.EmbedID(vocab_size * vocab_size, dim_embed),
            lay_uni=L.LSTM(dim_embed, dim1, forget_bias_init=0),
            lay_bi=L.StatelessLSTM(dim_embed, dim1, forget_bias_init=0),
            lay_int=L.LSTM(dim1 * 3, dim2, forget_bias_init=0),
            lin1=L.Linear(dim2, dim3),
            lin2=L.Linear(dim3, class_size),
        )
        self.vocab_size = vocab_size
        try:
            cuda.check_cuda_available()
            self.to_gpu()
            print 'run on the GPU.'
        except:
            print 'run on the CPU.'
        self.dim_embed = dim_embed
        self.optimizer = optimizers.MomentumSGD()
        self.optimizer.setup(self)
        self.loss_var = Variable(xp.zeros((), dtype=np.float32))
        self.reset_state()

    def __call__(self, xs, train):

        x_3gram = xs[0]
        sp2 = xs[1]

        x_uni = x_3gram[:, 0]
        y = Variable(x_uni, volatile=not train)
        y = self.embed_uni(y)
        y_uni = self.lay_uni(y)

        ## bigram です。
        x_bi = x_3gram[:, 0] * self.vocab_size + x_3gram[:, 1]
        y = Variable(x_bi, volatile=not train)
        y = self.embed_bi(y)
        if self.is_odd:
            self.c_odd, self.h_odd = self.lay_bi(self.c_odd, self.h_odd, y)
            if self.h_evn is None:
                self.h_evn = Variable(xp.zeros_like(self.h_odd.data),
                                      volatile=not train)
            y = concat.concat((y_uni, self.h_odd, self.h_evn))
        else:
            self.c_evn, self.h_evn = self.lay_bi(self.c_evn, self.h_evn, y)
            y = concat.concat((y_uni, self.h_evn, self.h_odd))
        self.is_odd = not self.is_odd

        y = self.lay_int(y)
        y = self.lin1(F.dropout(y, train=train))
        y = F.relu(y)
        y = self.lin2(F.dropout(y, train=train))

        return y

    def reset_state(self):
        if self.loss_var is not None:
            self.loss_var.unchain_backward()  # 念のため
        self.loss_var = Variable(xp.zeros((),
                                          dtype=xp.float32))  # reset loss_var
        self.lay_uni.reset_state()
        self.is_odd = True
        self.c_odd = None
        self.c_evn = None
        self.h_odd = None
        self.h_evn = None
        self.lay_int.reset_state()
        return
Esempio n. 20
0
def train_dcgan_labeled(evol, dis, proj, epoch0=0):
    global epoch
    o_evol = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_evol.setup(evol)
    o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_dis.setup(dis)
    o_proj = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_proj.setup(proj)
    if not args.fresh_start:
        serializers.load_hdf5("%s/dcgan_model_evol.h5" % (out_model_dir), evol)
        serializers.load_hdf5("%s/dcgan_state_evol.h5" % (out_model_dir),
                              o_evol)
        serializers.load_hdf5("%s/dcgan_model_dis.h5" % (out_model_dir), dis)
        serializers.load_hdf5("%s/dcgan_state_dis.h5" % (out_model_dir), o_dis)
        serializers.load_hdf5("%s/dcgan_model_proj.h5" % (out_model_dir), proj)
        serializers.load_hdf5("%s/dcgan_state_proj.h5" % (out_model_dir),
                              o_proj)

    o_evol.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_proj.add_hook(chainer.optimizer.WeightDecay(0.00001))

    vis_process = None
    for epoch in xrange(epoch0, n_epoch):
        for train_ctr in xrange(0, n_train, batchsize):
            print "epoch:", epoch, "train:", train_ctr,
            # discriminator
            # 0: from dataset
            # 1: from noise

            good_movie = True
            prediction_movie = n_movie * [None]
            try:
                current_movie = load_movie()
            except:
                continue

            for i in range(n_timeseries - 1):
                if current_movie[i] is None:
                    good_movie = False
                else:
                    prediction_movie[i] = current_movie[i]
            if not good_movie: continue
            for i in range(n_timeseries - 1, n_movie):
                prediction_movie[i] = evolve_image(
                    evol, proj, prediction_movie[i - n_timeseries + 1:i])

            if train_ctr % save_interval == 0:
                for answer_mode in ['predict', 'observe']:
                    for offset in [n_timeseries, 16, 32, 64, 119]:
                        if offset >= n_movie: continue
                        img_prediction = prediction_movie[offset]
                        if answer_mode == 'observe':
                            img_prediction = current_movie[offset]
                        if img_prediction is None: continue
                        imgfn = '%s/futuresun_%d_%04d_%s+%03d.png' % (
                            out_image_dir, epoch, train_ctr, answer_mode,
                            offset)
                        plt.rcParams['figure.figsize'] = (12.0, 12.0)
                        plt.close('all')
                        plt.imshow(img_prediction, vmin=0, vmax=1.4)
                        plt.suptitle(imgfn)
                        plt.savefig(imgfn)
                        subprocess.call("cp %s ~/public_html/futuresun/" %
                                        (imgfn),
                                        shell=True)

                # we don't have enough disk for history
                history_dir = 'history/'  #%d-%d'%(epoch,  train_ctr)
                subprocess.call("mkdir -p %s " % (history_dir), shell=True)
                subprocess.call("cp %s/*.h5 %s " %
                                (out_model_dir, history_dir),
                                shell=True)

                if epoch > 0 or train_ctr > 0:
                    print 'saving model...'
                    serializers.save_hdf5(
                        "%s/dcgan_model_evol.h5" % (out_model_dir), evol)
                    serializers.save_hdf5(
                        "%s/dcgan_state_evol.h5" % (out_model_dir), o_evol)
                    serializers.save_hdf5(
                        "%s/dcgan_model_dis.h5" % (out_model_dir), dis)
                    serializers.save_hdf5(
                        "%s/dcgan_state_dis.h5" % (out_model_dir), o_dis)
                    serializers.save_hdf5(
                        "%s/dcgan_model_proj.h5" % (out_model_dir), proj)
                    serializers.save_hdf5(
                        "%s/dcgan_state_proj.h5" % (out_model_dir), o_proj)
                    print '...saved.'

            movie_in = None
            movie_out = None
            movie_out_predict = None
            evol_scores = {}
            proj_scores = {}
            matsuoka_shuzo = {}
            shuzo_evoke_timestep = []
            difficulties = ['normal', 'hard']
            vis_kit = {}
            for difficulty in difficulties:
                evol_scores[difficulty] = [0.0]
                proj_scores[difficulty] = [0.0]
                matsuoka_shuzo[difficulty] = True
                vis_kit[difficulty] = None
            matsuoka_shuzo[
                'normal'] = False  # dameda, dameda.... Akirameyou....
            if vis_process is not None:
                vis_process.join()
                vis_process = None

            # start main training routine.
            print
            next_shuzo_scale = 10.0 * (1 + epoch)
            next_shuzo_offset = 1 + abs(
                int(round(np.random.normal(scale=next_shuzo_scale))))
            for train_offset in range(0, n_movie - n_timeseries):
                for difficulty in difficulties:
                    movie_clip = current_movie
                    if not matsuoka_shuzo[difficulty]:
                        # Doushitesokode yamerunda...
                        continue
                    else:
                        # Akiramen'nayo!
                        pass

                    if difficulty == 'normal':
                        movie_clip_in = movie_clip
                    else:
                        movie_clip_in = prediction_movie
                    maybe_dat = create_batch(train_offset, movie_clip_in,
                                             movie_clip)
                    if not maybe_dat:
                        #print "Warning: skip offset", train_offset, "because of unavailable data."
                        continue
                    data_in, data_out, data_other = maybe_dat
                    movie_in = Variable(cuda.to_gpu(data_in))
                    movie_out = Variable(cuda.to_gpu(data_out))
                    movie_other = Variable(cuda.to_gpu(data_other))

                    movie_out_predict_before = evol(movie_in)
                    movie_out_predict = proj(
                        movie_out_predict_before)  # no proj

                    vis_kit[difficulty] = (movie_in.data.get(),
                                           movie_out.data.get(),
                                           movie_out_predict_before.data.get(),
                                           movie_out_predict.data.get())

                    if args.norm == 'dcgan':
                        yl = dis(movie_in, movie_out_predict)
                        L_evol = F.softmax_cross_entropy(
                            yl, Variable(xp.zeros(batchsize, dtype=np.int32)))
                        L_dis = F.softmax_cross_entropy(
                            yl, Variable(xp.ones(batchsize, dtype=np.int32)))

                        # train discriminator
                        yl_train = dis(movie_in, movie_out)
                        L_dis += F.softmax_cross_entropy(
                            yl_train,
                            Variable(xp.zeros(batchsize, dtype=np.int32)))
                    elif args.norm == 'CA':
                        L_evol = d_norm(0, dis, movie_out,
                                        movie_out_predict_before)
                        L_proj = d_norm(0, dis, movie_out, movie_out_predict)
                        L_dis = d_norm(1, dis, movie_out,
                                       movie_out_predict_before)
                        # L_dis  += d_norm(1, dis, movie_out, movie_out_predict)
                        L_dis += d_norm(0, dis, movie_out, movie_other)
                        # L_dis  += d_norm(0, dis, movie_other, movie_out)
                    else:
                        L2norm = (movie_out - movie_out_predict)**2
                        yl = F.sum(L2norm) / L2norm.data.size
                        L_evol = yl

                    evol_scores[difficulty] += [
                        L_evol.data.get()
                    ]  # np.average(F.softmax(yl).data.get()[:,0])
                    proj_scores[difficulty] += [
                        L_proj.data.get()
                    ]  # np.average(F.softmax(yl).data.get()[:,0])

                    # stop learning on normal mode.
                    if difficulty == 'hard':
                        o_evol.zero_grads()
                        L_evol.backward()
                        o_evol.update()

                        o_dis.zero_grads()
                        L_dis.backward()
                        o_dis.update()

                        o_proj.zero_grads()
                        L_proj.backward()
                        o_proj.update()

                    movie_in.unchain_backward()
                    movie_out_predict.unchain_backward()
                    movie_out_predict_before.unchain_backward()
                    movie_other.unchain_backward()
                    L_evol.unchain_backward()
                    if args.norm == 'dcgan' or args.norm == 'CA':
                        L_dis.unchain_backward()

                    sys.stdout.write(
                        '%d %6s %s: %f -> %f, %f -> %f shuzo:%s\r' %
                        (train_offset, difficulty, args.norm,
                         np.average(evol_scores['normal']),
                         np.average(proj_scores['normal']),
                         np.average(evol_scores['hard']),
                         np.average(proj_scores['hard']),
                         str(shuzo_evoke_timestep[-10:])))
                    sys.stdout.flush()

                    # update the prediction as results of learning.
                    prediction_movie[
                        train_offset + n_timeseries - 1] = evolve_image(
                            evol, proj,
                            prediction_movie[train_offset:train_offset +
                                             n_timeseries - 1])

                    # prevent too much learning from noisy prediction.
                    # if len(evol_scores['hard'])>=10 and np.average(evol_scores['hard'][-5:-1]) > 5 * np.average(evol_scores['normal']):
                    if train_offset == next_shuzo_offset:
                        next_shuzo_offset = train_offset + 1 + abs(
                            int(round(
                                np.random.normal(scale=next_shuzo_scale))))
                        # Zettaini, akiramennna yo!
                        # matsuoka_shuzo['hard'] = False
                        shuzo_evoke_timestep += [train_offset]
                        evol_scores['hard'] = [0.0]
                        proj_scores['hard'] = [0.0]
                        for t in range(train_offset,
                                       train_offset + n_timeseries):
                            if current_movie[t] is not None:
                                prediction_movie[t] = current_movie[t]

            print

            def visualize_vis_kit(vis_kit):
                print "visualizing...",
                sys.stdout.flush()
                for difficulty in difficulties:
                    if vis_kit[difficulty] is None:
                        continue
                    movie_data, movie_out_data, movie_pred_data, movie_proj_data = vis_kit[
                        difficulty]
                    imgfn = '%s/batch-%s_%d_%04d.png' % (
                        out_image_dir, difficulty, epoch, train_ctr)

                    n_col = n_timeseries + 3
                    plt.rcParams['figure.figsize'] = (1.0 * n_col,
                                                      1.0 * batchsize)
                    plt.close('all')

                    for ib in range(batchsize):
                        for j in range(n_timeseries - 1):
                            plt.subplot(batchsize, n_col, 1 + ib * n_col + j)
                            if j < 2:
                                vmin = -1
                                vmax = 1
                            else:
                                vmin = 0
                                vmax = 1.4
                            plt.imshow(movie_data[ib, j, :, :],
                                       vmin=vmin,
                                       vmax=vmax)
                            plt.axis('off')

                        plt.subplot(batchsize, n_col,
                                    1 + ib * n_col + n_timeseries - 1)
                        plt.imshow(movie_pred_data[ib, 0, :, :],
                                   vmin=0,
                                   vmax=1.4)
                        plt.axis('off')

                        plt.subplot(batchsize, n_col,
                                    1 + ib * n_col + n_timeseries)
                        plt.imshow(movie_proj_data[ib, 0, :, :],
                                   vmin=0,
                                   vmax=1.4)
                        plt.axis('off')

                        plt.subplot(batchsize, n_col,
                                    1 + ib * n_col + n_timeseries + 2)
                        plt.imshow(movie_out_data[ib, 0, :, :],
                                   vmin=0,
                                   vmax=1.4)
                        plt.axis('off')

                    plt.suptitle(imgfn)
                    plt.savefig(imgfn)
                    subprocess.call(
                        "cp %s ~/public_html/suntomorrow-batch-%s-%s.png" %
                        (imgfn, difficulty, args.gpu),
                        shell=True)
                print "visualized.",
                sys.stdout.flush()

            vis_process = Process(target=visualize_vis_kit, args=(vis_kit, ))
            vis_process.start()
Esempio n. 21
0
class RNNCharEstimator(ChainerClassifier):
    def __init__(self,
                 net_type='lstm',
                 net_hidden=100,
                 vocab_size=1000,
                 dropout_ratio=0.0,
                 seq_size=70,
                 grad_clip=100.0,
                 **params):
        ChainerClassifier.__init__(self, **params)
        self.net_hidden = net_hidden
        self.net_type = net_type
        self.vocab_size = vocab_size
        self.dropout_ratio = dropout_ratio
        self.seq_size = seq_size
        self.grad_clip = grad_clip
        self.param_names.append('vocab_size')
        self.param_names.append('net_type')
        self.param_names.append('net_hidden')
        self.param_names.append('dropout_ratio')

    def setup_network(self, n_features):
        if self.net_type == 'lstm':
            self.network = CharLSTM(self.vocab_size, self.net_hidden,
                                    self.batch_size)
        elif self.net_type == 'irnn':
            self.network = CharIRNN(self.vocab_size, self.net_hidden,
                                    self.batch_size)
        else:
            error("Unknown net_type")
        self.reset_accum_loss()

    def reset_accum_loss(self):
        if self.gpu >= 0:
            self.accum_loss = Variable(cuda.zeros(()))
        else:
            self.accum_loss = Variable(np.zeros(()))

    def forward_train(self, x, t):
        return self.network.train(x, t, dropout_ratio=self.dropout_ratio)

    def predict(self, x_data):
        self.network.reset_state(1)
        if self.gpu >= 0:
            self.network.to_gpu()
            x_data = cuda.to_gpu(x_data)
        results = None
        for i in xrange(x_data.shape[0]):
            x = Variable(x_data[i, :])
            y = self.network.predict(x)
            if results == None:
                results = cuda.to_cpu(y.data)
            else:
                results = np.concatenate([results, cuda.to_cpu(y.data)])
        results = results.argmax(1)
        return results

    def fit_update(self, loss, batch_id):
        self.accum_loss += loss
        if ((batch_id + 1) % self.seq_size) == 0:  # Run Truncated BPTT
            self.optimizer.zero_grads()
            self.accum_loss.backward()
            self.accum_loss.unchain_backward()  # truncate
            self.optimizer.clip_grads(self.grad_clip)
            self.optimizer.update()
            self.reset_accum_loss()

    def make_batch(self, x_data, y_data, batch_id):
        batch_num = self.n_samples / self.batch_size
        x_batch = np.array([
            x_data[(batch_id + batch_num * j) % self.n_samples]
            for j in xrange(self.batch_size)
        ]).reshape(self.batch_size)
        y_batch = np.array([
            y_data[(batch_id + batch_num * j) % self.n_samples]
            for j in xrange(self.batch_size)
        ])
        return x_batch, y_batch
Esempio n. 22
0
    def train(self, words, steps, batchsize=100, sequence_length=10):
        """ Train the Predictor's model on words for steps number of steps. """

        whole_len = len(words)
        train_data = np.ndarray(whole_len, dtype=np.int32)
        jumps = steps * sequence_length

        # Initialize training data and maybe vocab.
        if self.vocab is None:
            vocab_initializing = True
            self.vocab = {}
        for i, word in enumerate(words):
            if vocab_initializing:
                if word not in self.vocab:
                    self.vocab[word] = len(self.vocab)
            train_data[i] = self.vocab[word]
        vocab_initializing = False


        print 'corpus length:', len(words)
        print 'self.vocab size:', len(self.vocab)

        # Initialize base model (if we need to)
        if self.model is None:
            self.model = BaseRNN(len(self.vocab), self.units)

        if self.gpu >= 0:
            cuda.get_device(self.gpu).use()
            self.model.to_self.gpu()

        optimizer = optimizers.RMSprop(lr=self.settings.learning_rate,
                                       alpha=self.settings.decay_rate,
                                       eps=1e-8)
        optimizer.setup(self.model)

        jumpsPerEpoch = whole_len / batchsize
        epoch = 0
        start_at = time.time()
        cur_at = start_at
        state = make_initial_state(self.units, batchsize=batchsize)

        if self.gpu >= 0:
            accum_loss = Variable(cuda.zeros(()))
            for _, value in state.items():
                value.data = cuda.to_self.gpu(value.data)
        else:
            accum_loss = Variable(np.zeros((), dtype=np.float32))

        print 'going to train {} iterations'.format(steps)
        for i in xrange(jumps):
            x_batch = np.array([train_data[(jumpsPerEpoch * j + i) % whole_len]
                                for j in xrange(batchsize)])
            y_batch = np.array([train_data[(jumpsPerEpoch * j + i + 1) % whole_len]
                                for j in xrange(batchsize)])

            if self.gpu >= 0:
                x_batch = cuda.to_self.gpu(x_batch)
                y_batch = cuda.to_self.gpu(y_batch)


            state, loss_i = self.model.forward_one_step(x_batch,
                                                        y_batch,
                                                        state,
                                                        dropout_ratio=self.settings.dropout)
            accum_loss += loss_i

            if (i + 1) % sequence_length == 0:
                now = time.time()
                print '{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/sequence_length, steps, accum_loss.data / sequence_length, now-cur_at)
                cur_at = now

                optimizer.zero_grads()
                accum_loss.backward()
                accum_loss.unchain_backward()  # truncate
                if self.gpu >= 0:
                    accum_loss = Variable(cuda.zeros(()))
                else:
                    accum_loss = Variable(np.zeros((), dtype=np.float32))


                optimizer.clip_grads(self.settings.grad_clip)
                optimizer.update()

            if (i + 1) % jumpsPerEpoch == 0:
                epoch += 1

                if epoch >= self.settings.learning_rate_decay_after:
                    optimizer.lr *= self.settings.learning_rate_decay
                    print 'decayed self.settings.learning rate by a factor {} to {}'.format(self.settings.learning_rate_decay, optimizer.lr)
Esempio n. 23
0
def main():
    args      = parse_args()
    init_program_state(args)
    vocab     = make_vocab()
    data, batched_data = load_data(args.train, vocab, args.batch_size)
    dev , batched_dev  = load_data(args.dev, vocab, 1)
    test, batched_test = load_data(args.test, vocab, 1)
    model = init_model(input_size = len(vocab),
            embed_size   = args.embed_size,
            hidden_size  = args.hidden_size,
            output_size  = len(vocab))
    optimizer = optimizers.SGD(lr=args.lr)
    
    # Begin Training
    UF.init_model_parameters(model)
    model = UF.convert_to_GPU(USE_GPU, model)
    optimizer.setup(model)
    
    batchsize  = args.batch_size
    epoch      = args.epoch
    accum_loss = Variable(xp.zeros((), dtype=np.float32))
    counter    = 0
    # For each epoch..
    for ep in range(epoch):
        UF.trace("Training Epoch %d" % ep)
        total_tokens = 0
        log_ppl      = 0.0
        
        # For each batch, do forward & backward computations
        for i, batch in enumerate(batched_data):
            loss, nwords  = forward(model, batch)
            accum_loss   += loss
            log_ppl      += loss.data.reshape(())
            # Tracing...
            total_tokens += nwords
#            UF.trace('  %d/%d = %.5f' % (min(i*batchsize, len(data)), len(data), loss.data.reshape(())*batchsize))
            # Counting
            if (counter+1) % bp_len == 0:
                optimizer.zero_grads()
                accum_loss.backward()
                accum_loss.unchain_backward()
                accum_loss = Variable(xp.zeros((), dtype=np.float32))
                
                optimizer.clip_grads(grad_clip)
                optimizer.update()
            counter += 1
        # Counting Perplexity
        log_ppl /= total_tokens
        UF.trace("  PPL (Train)  = %.10f" % math.exp(UF.to_cpu(USE_GPU, log_ppl)))
        dev_ppl = evaluate(model, batched_dev)
        UF.trace("  PPL (Dev)    = %.10f" % math.exp(UF.to_cpu(USE_GPU, dev_ppl)))

        # Reducing learning rate
        if ep > 6:
            optimizer.lr /= 1.2
            UF.trace("Reducing LR:", optimizer.lr)

    # Begin Testing
    UF.trace("Begin Testing...")
    test_ppl = evaluate(model, batched_test)
    UF.trace("  log(PPL) = %.10f" % test_ppl)
    UF.trace("  PPL      = %.10f" % math.exp(UF.to_cpu(USE_GPU, test_ppl)))
Esempio n. 24
0
    def train(self,
              train_data,
              train_input,
              test_data,
              test_input,
              n_epochs,
              filename=None,
              KL_loss=False,
              Add_training=False):
        """
        
        :param train_data: data in the form n_batches x batch_size x n_steps x n_outputs
        :param test_data: data in the form n_batches x batch_size x n_steps x n_outputs
        :param n_epochs: nr of training epochs
        :param dec_input: this is the input to the decoder, which can modulate input dynamics; size: step_size x n_inputs
        :return:
        """

        # keep track of loss
        train_loss = np.zeros(n_epochs)
        test_loss = np.zeros(n_epochs)
        batches_loss = np.zeros(train_data.shape[0] * n_epochs)

        # keep track of learned alphas and weights
        if self.model.mode is not 'Static':
            learning_alphasS = np.empty(
                (n_epochs + 1, self.model.hidden.alphaS.alpha.size))
            learning_alphasR = np.empty(
                (n_epochs + 1, self.model.hidden.alphaR.alpha.size))
            learning_alphasS[0, :] = self.model.hidden.alphaS.alpha
            learning_alphasR[0, :] = self.model.hidden.alphaR.alpha

#        saved_U_fast = np.zeros((n_epochs,self.model.n_fast, self.model.n_slow+self.model.n_inout))
#        saved_U_inout= np.zeros((n_epochs, self.model.n_inout, self.model.n_fast))
#        saved_W_fast = np.zeros((n_epochs, self.model.n_fast, self.model.n_fast))
#        saved_W_inout= np.zeros((n_epochs, self.model.n_inout, self.model.n_inout))

        index = 0  #for batches_wise loss
        best_loss = 4000

        if Add_training:
            self.optimizer.setup(self.model.slow)

    #self.model.inout.W.W.data = np.zeros((25,25))  #NO RECURRENT CONNECTION IN OUTPUT LAYER!

        for epoch in tqdm.tqdm(xrange(n_epochs)):
            #for epoch in xrange(n_epochs):
            with chainer.using_config('train', True):

                n_batches = train_data.shape[0]
                batch_size = train_data.shape[1]
                n_steps = train_data.shape[2]

                for i in range(n_batches):
                    #print('Sample number %i' %i)
                    loss = Variable(self.xp.array(0, 'float32'))
                    self.model.reset_state()

                    #initialization for this batch
                    data0 = Variable(train_data[i, :, 0, :])

                    self.model.hidden.initialize_state(batch_size)
                    #self.model.readout.initialize_state(batch_size)

                    for t in xrange(0, n_steps, 1):
                        x = Variable(train_input[i, :, t, :])
                        data = self.xp.array(train_data[i, :, t, :])
                        _loss = mean_squared_error(self.model(x),
                                                   data)  # prediction mode
                        if KL_loss:
                            _loss = self.KL_divergence(self.model(), data)
                        #print _loss
                        train_loss[epoch] += cuda.to_cpu(_loss.data)
                        loss += _loss

                    batches_loss[index] = loss.data
                    index = index + 1

                    self.model.cleargrads(
                    )  #look into this function to clear grad of a whole link
                    loss.backward()
                    loss.unchain_backward()

                    #self.model.inout.W.disable_update() #NO RECURRENT CONNECTIONS IN OUTPUT LAYER!
                    #if self.model.mode == 'Static':
                    #    self.model.hidden.alphaS.disable_update()
                    #    self.model.hidden.alphaR.disable_update()

                    if Add_training:  #delete grads to be deleted! or use enable_update()
                        self.model.fast.U1.disable_update()
                        self.model.fast.W.disable_update()
                        self.model.inout.disable_update()
                        self.model.slow.W.disable_update()
                    self.optimizer.update()
                    #print 'UPDATE'

#            saved_U_fast[epoch,:,:] = self.model.fast.U.W.data
#            saved_U_inout[epoch,:,:]= self.model.inout.U.W.data
#            saved_W_fast[epoch,:,:] = self.model.fast.W.W.data
#            saved_W_inout[epoch, :,:]= self.model.inout.W.W.data
#
#            #save learning of time constants
            if self.model.mode is not 'Static':
                learning_alphasS[epoch +
                                 1, :] = self.model.hidden.alphaS.alpha.data
                learning_alphasR[epoch +
                                 1, :] = self.model.hidden.alphaR.alpha.data

            # compute loss per epoch
            train_loss[epoch] /= (n_batches * batch_size * self.model.n_out)

            # save model at some epoch
            #epochs_save = np.linspace(0, n_epochs-n_epochs/10, num=10, dtype=int)
            #if epoch in epochs_save:
            #    thisname = 'model_at_epoch_%i' %epoch
            #    self.save('saved_models/'+filename+'/'+thisname)

            # validation
            with chainer.using_config('train', False):

                n_batches = test_data.shape[0]
                batch_size = test_data.shape[1]
                n_steps = test_data.shape[2]
                #                assert(n_steps == n_clamp+n_pred)

                for i in range(n_batches):

                    self.model.reset_state()
                    data0 = Variable(test_data[i, :, t, :])

                    self.model.hidden.initialize_state(batch_size)
                    #                    self.model.readout.initialize_state(batch_size)

                    for t in xrange(0, n_steps, 1):
                        x = Variable(test_input[i, :, t, :])
                        data = self.xp.array(test_data[i, :, t, :])
                        _loss = mean_squared_error(self.model(x),
                                                   data)  # prediction mode
                        if KL_loss:
                            _loss = self.KL_divergence(self.model(), data)
                        test_loss[epoch] += cuda.to_cpu(_loss.data)

            # compute loss per epoch
            test_loss[epoch] /= (n_batches * batch_size * self.model.n_out)

            #method do avoid overfitting
            if test_loss[epoch] < best_loss:
                best_loss = test_loss[epoch]
                self.save('saved_models/' + filename + '/best')
                np.save('saved_models/' + filename + '/conv_epoch', epoch)
            # end of training cycle

            np.save('saved_models/' + filename + '/best_loss', best_loss)
        if self.model.mode is not 'Static':
            np.save('saved_models/' + filename + '/learning_alphaS',
                    learning_alphasS)
            np.save('saved_models/' + filename + '/learning_alphaR',
                    learning_alphasR)
#        np.save('saved_U_fast', saved_U_fast)
#        np.save('saved_W_fast', saved_W_fast)
#        np.save('saved_U_inout', saved_U_inout)
#        np.save('saved_W_inout', saved_W_inout)
#        np.save('saved_models/'+filename+'/saved_alphas_fast', learning_alphas_fast)
#        np.save('saved_models/'+filename+'/saved_alphas_slow', learning_alphas_slow)
#        np.save('saved_models/'+filename+'/saved_alphas_inout', learning_alphas_inout)
#

        return train_loss, test_loss, batches_loss
Esempio n. 25
0
def main():
    # arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', type=str, default='data/dazai')
    parser.add_argument('--checkpoint_dir', type=str, default='model')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--rnn_size', type=int, default=128)
    parser.add_argument('--learning_rate', type=float, default=2e-3)
    parser.add_argument('--learning_rate_decay', type=float, default=0.97)
    parser.add_argument('--learning_rate_decay_after', type=int, default=10)
    parser.add_argument('--decay_rate', type=float, default=0.95)
    parser.add_argument('--dropout', type=float, default=0.0)
    parser.add_argument('--seq_length', type=int, default=50)
    parser.add_argument('--batchsize', type=int, default=50)
    parser.add_argument('--epochs', type=int, default=50)
    parser.add_argument('--grad_clip', type=int, default=5)
    parser.add_argument('--init_from', type=str, default='')
    parser.add_argument('--enable_checkpoint', type=bool, default=True)
    parser.add_argument('--file_name', type=str, default='input.txt')
    args = parser.parse_args()

    if not os.path.exists(args.checkpoint_dir):
        os.mkdir(args.checkpoint_dir)

    n_epochs = args.epochs
    n_units = args.rnn_size
    batchsize = args.batchsize
    bprop_len = args.seq_length
    grad_clip = args.grad_clip

    xp = cuda.cupy if args.gpu >= 0 else np

    train_data, words, vocab = load_data(args.data_dir, args.file_name)
    pickle.dump(vocab, open('%s/vocab.bin' % args.data_dir, 'wb'))

    if len(args.init_from) > 0:
        model = pickle.load(open(args.init_from, 'rb'))
    else:
        model = CharRNN(len(vocab), n_units)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    optimizer = optimizers.RMSprop(lr=args.learning_rate,
                                   alpha=args.decay_rate,
                                   eps=1e-8)
    #optimizer = chainer.optimizers.SGD(lr=1.0)
    optimizer.setup(model)
    optimizer.add_hook(
        chainer.optimizer.GradientClipping(grad_clip))  #勾配の上限を設定

    whole_len = train_data.shape[0]
    #jump         = whole_len / batchsize
    jump = int(whole_len / batchsize)
    epoch = 0
    start_at = time.time()
    cur_at = start_at
    state = make_initial_state(n_units, batchsize=batchsize)
    if args.gpu >= 0:
        accum_loss = Variable(xp.zeros(()).astype(np.float32))
        for key, value in state.items():
            value.data = cuda.to_gpu(value.data)
    else:
        accum_loss = Variable(xp.zeros(()).astype(np.float32))

    print('going to train {} iterations'.format(jump * n_epochs / bprop_len))
    sum_perp = 0
    count = 0
    iteration = 0
    for i in range(jump * n_epochs):
        x_batch = xp.array([
            train_data[(jump * j + i) % whole_len] for j in xrange(batchsize)
        ])
        y_batch = xp.array([
            train_data[(jump * j + i + 1) % whole_len]
            for j in xrange(batchsize)
        ])

        if args.gpu >= 0:
            x_batch = cuda.to_gpu(x_batch)
            y_batch = cuda.to_gpu(y_batch)

        state, loss_i = model.forward_one_step(x_batch,
                                               y_batch,
                                               state,
                                               dropout_ratio=args.dropout)
        accum_loss += loss_i
        count += 1

        if (i + 1) % bprop_len == 0:  # Run truncated BPTT
            iteration += 1
            sum_perp += accum_loss.data
            now = time.time()
            #print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at))
            print('{}/{}, train_loss = {}, time = {:.2f}'.format(
                (i + 1) / bprop_len, jump * n_epochs / bprop_len,
                accum_loss.data / bprop_len, now - cur_at))
            cur_at = now

            model.cleargrads()
            #optimizer.zero_grads()
            accum_loss.backward()
            accum_loss.unchain_backward()  # truncate
            #accum_loss = Variable(xp.zeros(()).astype(np.float32))
            if args.gpu >= 0:
                accum_loss = Variable(xp.zeros(()).astype(np.float32))
                #accum_loss = Variable(cuda.zeros(()))
            else:
                accum_loss = Variable(np.zeros((), dtype=np.float32))
            #optimizer.clip_grads(grad_clip)
            optimizer.update()

        if (i + 1) % 1000 == 0:
            print('epoch: ', epoch)
            print('iteration: ', iteration)
            print('training perplexity: ', np.exp(float(sum_perp) / count))
            sum_perp = 0
            count = 0

        if args.enable_checkpoint:
            if (i + 1) % 10000 == 0:
                fn = ('%s/charrnn_epoch_%.2f.chainermodel' %
                      (args.checkpoint_dir, float(i) / jump))
                pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb'))
                pickle.dump(
                    copy.deepcopy(model).to_cpu(),
                    open('%s/latest.chainermodel' % (args.checkpoint_dir),
                         'wb'))

        if (i + 1) % jump == 0:
            epoch += 1

            if epoch >= args.learning_rate_decay_after:
                optimizer.lr *= args.learning_rate_decay
                print('decayed learning rate by a factor {} to {}'.format(
                    args.learning_rate_decay, optimizer.lr))

        sys.stdout.flush()
def findNumEpoch(architecture, waves, infos, gpu_id, waveFs):
    if cupy is not None and gpu_id >= 0:
        xp = cupy
        cupy.cuda.Device(gpu_id).use()
    else:
        xp = np

    inputLength = totalInputLength(architecture)
    labels = getLabels()
    numLabel = len(labels)
    groupFold = ((0, 1, 2), (3, ), (4, ))

    np.random.seed()
    seed = np.random.randint(0, np.iinfo(int32).max)
    np.random.seed(seed)
    net = Net(numLabel, architecture, functions.elu)
    # 	opt=Eve(1e-4)
    opt = optimizers.Adam(1e-4)
    opt.setup(net)
    if gpu_id >= 0: net.to_gpu(gpu_id)

    insLabelSize = 2**2
    devSize = 2**1
    devSegmentSecUpper = 10

    devEpoch = 2**5
    convergenceEpoch = 2**5 * devEpoch
    devSegmentLenUpper = int(devSegmentSecUpper * waveFs)
    devFold = sorted(set(groupFold[1]))
    devLabelWave = groupLabelWave((devFold, ), infos)[0]
    devLabelWave = list(
        itertools.chain.from_iterable([[(li, i) for i in devLabelWave[la]]
                                       for li, la in enumerate(labels)]))
    devLabelWave = sorted(devLabelWave, key=lambda lw: len(waves[lw[1]]))
    devBatchIndex = np.array_split(np.arange(len(devLabelWave)),
                                   int(np.ceil(len(devLabelWave) / devSize)))
    devLabelSize = np.zeros(numLabel, int32)
    for li, wi in devLabelWave:
        devLabelSize[li] += len(waves[wi])

    devWaves = {}
    for li, wi in devLabelWave:
        wave = waves[wi]
        wave = np.concatenate((wave, np.zeros((inputLength - 1) // 2,
                                              float32)))
        devWaves[wi] = wave

    insFold = sorted(set(groupFold[0]))
    insLabelWave = groupLabelWave((insFold, ), infos)[0]
    insLabelWaveIndex = [[] for i in range(len(labels))]
    for li, la in enumerate(labels):
        for i in insLabelWave[la]:
            wave = waves[i]
            timeIndex = np.arange(len(wave))
            waveIndex = np.ones(len(wave), int32) * i
            index = np.stack((waveIndex, timeIndex), axis=1)
            insLabelWaveIndex[li].append(index)
        insLabelWaveIndex[li] = np.concatenate(insLabelWaveIndex[li], axis=0)

    insRemainingLabelWave = [
        np.random.permutation(insLabelWaveIndex[li])
        for li in range(len(labels))
    ]

    epoch = 0
    bestEpoch = 0
    epochIncorrect = {}
    while epoch < bestEpoch + convergenceEpoch:
        x, tr = makeInpTru(insLabelWaveIndex, waves, insRemainingLabelWave,
                           inputLength, insLabelSize, numLabel)
        x = x[:, newaxis, :, newaxis]
        x = xp.asarray(x)
        x = Variable(x)
        x = net.callSingle(x, True)
        tr = tr[..., newaxis, newaxis]
        tr = xp.asarray(tr)
        e = functions.softmax_cross_entropy(x, tr)

        net.cleargrads()
        e.backward()
        e.unchain_backward()
        # 		opt.update(loss=e.data)
        opt.update()

        if epoch % devEpoch != devEpoch - 1:
            epoch += 1
            continue
        incorrect = xp.zeros(numLabel, int32)
        with chainer.using_config("enable_backprop", False):
            for bi, index in enumerate(devBatchIndex):
                waveIndex = np.array([devLabelWave[i][1] for i in index])
                tru = np.array([devLabelWave[i][0] for i in index])
                waveLen = len(devWaves[waveIndex[-1]])
                segmentTimes = np.array_split(
                    np.arange(waveLen),
                    int(np.ceil((waveLen) / devSegmentLenUpper)))
                net.reset()
                for si, segTime in enumerate(segmentTimes):
                    t0 = segTime[0]
                    t1 = segTime[-1] + 1
                    x = np.zeros((len(index), t1 - t0), float32)
                    tr = -np.ones((len(index), t1 - t0), int32)
                    for xi, wi in enumerate(waveIndex):
                        if len(devWaves[wi]) <= t0: continue
                        w = devWaves[wi][t0:t1]
                        x[xi, :len(w)] = w
                        tr[xi, :len(w)] = tru[xi]
                    if t0 < (inputLength - 1) // 2:
                        tr[:, :(inputLength - 1) // 2 - t0] = -1

                    x = x[:, newaxis, :, newaxis]
                    x = xp.asarray(x)
                    x = Variable(x)
                    x = net(x, False)
                    x.unchain_backward()

                    x = xp.argmax(x.data, axis=1)
                    tr = tr[..., newaxis]
                    tr = xp.asarray(tr)
                    for li, la in enumerate(labels):
                        incorrect[li] += (x[tr == li] != li).sum()

            net.reset()
            if gpu_id >= 0: incorrect = cupy.asnumpy(incorrect)
            incorrect = (incorrect / devLabelSize).mean()
            print("epoch", epoch, "incorrect", incorrect)

        if len(epochIncorrect) == 0 or incorrect < epochIncorrect[bestEpoch]:
            bestEpoch = epoch
        epochIncorrect[epoch] = incorrect
        epoch += 1

    devEpochs = np.array(sorted(epochIncorrect), int32)
    bestScore = epochIncorrect[bestEpoch]
    epochIncorrect = np.array([epochIncorrect[ep] for ep in devEpochs])

    return bestEpoch, bestScore, seed
Esempio n. 27
0
    def update_core(self):        
        enc_optimizer = self.get_optimizer('enc')
        dec_optimizer = self.get_optimizer('dec')
        dis_optimizer = self.get_optimizer('dis')
        
        enc, dec, dis = self.enc, self.dec, self.dis
        xp = enc.xp

        batch = self.get_iterator('main').next()
        batchsize = len(batch)
        in_ch = batch[0][0].shape[0]
        assert(batch[0][1].shape[0] % 2 == 0)

        out_ch = int(batch[0][1].shape[0] / 2)

        # Changing to voxel space
        w_in = 256
        w_out = 64
        
        x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype(np.float32)
        t_out = xp.zeros((batchsize, out_ch, w_out, w_out, w_out)).astype(np.float32)
        d_in = xp.zeros((batchsize, out_ch, w_out, w_out, w_out)).astype(np.float32)

        for i in range(batchsize):
            x_in[i,:] = xp.asarray(batch[i][0])
            t_out[i,:] = xp.asarray(batch[i][1][0])
            d_in[i,:] = xp.asarray(batch[i][1][1])

        x_in = Variable(x_in)
        t_out = Variable(t_out)
        d_in = Variable(d_in)

        with chainer.using_config('train', True):

            # This will no longer work for pix-pix
            z = enc(x_in)
            x_out = dec(z)

            y_fake = dis(x_out, d_in)
            y_real = dis(t_out, d_in)

            update_dis, update_gen = self.check_dis(y_real, y_fake)

            if update_gen:
                enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake)
                for z_ in z:
                    z_.unchain_backward()
               
                dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake)
            else:
                print("Not updating gen")
            
            
            x_in.unchain_backward()
            x_out.unchain_backward()

            if update_dis :
                dis_optimizer.update(
                    self.loss_dis, dis, y_real, y_fake)
            else:
                print("Not updating disc")
Esempio n. 28
0
    def _train(self, **kwargs):
            gpu = -1 if "gpu" not in kwargs else kwargs["gpu"]
            lr = 2e-3 if "lr" not in kwargs else kwargs["lr"]
            lr_decay = 0.97 if "lr_decay" not in kwargs else kwargs["lr_decay"]
            lr_decay_after=10 if "lr_decay_after" not in kwargs else kwargs["lr_decay_after"]
            decay_rate = 0.95 if "decay_rate" not in kwargs else kwargs["decay_rate"]
            dropout = 0.0 if "dropout" not in kwargs else kwargs["dropout"]
            bprop_len   = 50 if "bprop_len" not in kwargs else kwargs["bprop_len"]
            batchsize   = 50 if "batchsize" not in kwargs else kwargs["batchsize"]
            grad_clip   = 5 if "grad_clip" not in kwargs else kwargs["grad_clip"]
            n_epochs  = 5 if "epochs" not in kwargs else kwargs["epochs"]
            if gpu >= 0:
                cuda.get_device(gpu).use()
                self.model.to_gpu()

            optimizer = optimizers.RMSprop(lr=lr, alpha=decay_rate, eps=1e-8)
            optimizer.setup(self.model)

            train_data = self.dataset
            whole_len    = train_data.shape[0]
            jump         = whole_len // batchsize
            epoch        = 0
            start_at     = time.time()
            cur_at       = start_at
            state        = self.model.make_initial_state(batchsize=batchsize)

            if gpu >= 0:
                accum_loss   = Variable(cuda.zeros(()))
                for key, value in state.items():
                    value.data = cuda.to_gpu(value.data)#plist
            else:
                accum_loss   = Variable(np.zeros((), dtype=np.float32))

            print ('going to train {} iterations'.format(jump * n_epochs))

            for i in range(jump * n_epochs):
                x_batch = np.array([train_data[(jump * j + i) % whole_len]
                                    for j in range(batchsize)])
                y_batch = np.array([train_data[(jump * j + i + 1) % whole_len]
                                    for j in range(batchsize)])

                if gpu >=0:
                    x_batch = cuda.to_gpu(x_batch)
                    y_batch = cuda.to_gpu(y_batch)

                state, loss_i = self.model.forward_one_step(x_batch, y_batch, state, dropout_ratio=dropout)
                accum_loss   += loss_i

                if (i + 1) % bprop_len == 0:  # Run truncated BPTT
                    now = time.time()
                    sys.stderr.write('\r{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)//bprop_len,(jump*n_epochs)//bprop_len, accum_loss.data / bprop_len, now-cur_at))
                    sys.stderr.flush()
                    cur_at = now

                    optimizer.zero_grads()
                    accum_loss.backward()
                    accum_loss.unchain_backward()  # truncate

                    if gpu >= 0:
                        accum_loss = Variable(cuda.zeros(()))
                    else:
                        accum_loss = Variable(np.zeros((), dtype=np.float32))

                    optimizer.clip_grads(grad_clip)
                    optimizer.update()

                    if (i + 1) % 10000 == 0:
                        pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))

                    if (i + 1) % jump == 0:
                        epoch += 1

                    if epoch >= lr_decay_after:
                        optimizer.lr *= lr_decay
                        print ('decayed learning rate by a factor {} to {}'.format(lr_decay, optimizer.lr))
                sys.stdout.flush()

            pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))
Esempio n. 29
0
class RNNCharEstimator(ChainerClassifier):
    def __init__(self, net_type='lstm', net_hidden=100,
                       vocab_size=1000, dropout_ratio=0.0, seq_size=70, grad_clip=100.0,
                       **params):
        ChainerClassifier.__init__(self, **params)
        self.net_hidden    = net_hidden
        self.net_type      = net_type
        self.vocab_size    = vocab_size
        self.dropout_ratio = dropout_ratio
        self.seq_size      = seq_size
        self.grad_clip     = grad_clip
        self.param_names.append('vocab_size')
        self.param_names.append('net_type')
        self.param_names.append('net_hidden')
        self.param_names.append('dropout_ratio')

    def setup_network(self, n_features):
        if self.net_type == 'lstm':
            self.network = CharLSTM(self.vocab_size, self.net_hidden, self.batch_size)
        elif self.net_type == 'irnn':
            self.network = CharIRNN(self.vocab_size, self.net_hidden, self.batch_size)
        else:
            error("Unknown net_type")
        self.reset_accum_loss()

    def reset_accum_loss(self):
        if self.gpu >= 0:
            self.accum_loss = Variable(cuda.zeros(()))
        else:
            self.accum_loss = Variable(np.zeros(()))

    def forward_train(self, x, t):
        return self.network.train(x, t, dropout_ratio=self.dropout_ratio)

    def predict(self, x_data):
        self.network.reset_state(1)
        if self.gpu >= 0:
            self.network.to_gpu()
            x_data = cuda.to_gpu(x_data)
        results = None
        for i in xrange(x_data.shape[0]):
            x = Variable(x_data[i,:])
            y = self.network.predict(x)
            if results == None:
                results = cuda.to_cpu(y.data)
            else:
                results = np.concatenate([results, cuda.to_cpu(y.data)])
        results = results.argmax(1)
        return results

    def fit_update(self, loss, batch_id):
        self.accum_loss += loss
        if ((batch_id + 1) % self.seq_size) == 0: # Run Truncated BPTT
            self.optimizer.zero_grads()
            self.accum_loss.backward()
            self.accum_loss.unchain_backward()  # truncate
            self.optimizer.clip_grads(self.grad_clip)
            self.optimizer.update()
            self.reset_accum_loss()

    def make_batch(self, x_data, y_data, batch_id):
        batch_num = self.n_samples / self.batch_size
        x_batch = np.array([x_data[(batch_id + batch_num * j) % self.n_samples]
                            for j in xrange(self.batch_size)]).reshape(self.batch_size)
        y_batch = np.array([y_data[(batch_id + batch_num * j) % self.n_samples]
                            for j in xrange(self.batch_size)])
        return x_batch, y_batch
Esempio n. 30
0
def train_dcgan_labeled(evol, dis, proj, epoch0=0):
    global epoch
    o_evol = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_evol.setup(evol)
    o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_dis.setup(dis)
    o_proj = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_proj.setup(proj)
    if not args.fresh_start:
        serializers.load_hdf5("%s/dcgan_model_evol.h5"%(out_model_dir),evol)
        serializers.load_hdf5("%s/dcgan_state_evol.h5"%(out_model_dir),o_evol)
        serializers.load_hdf5("%s/dcgan_model_dis.h5"%(out_model_dir),dis)
        serializers.load_hdf5("%s/dcgan_state_dis.h5"%(out_model_dir),o_dis)
        serializers.load_hdf5("%s/dcgan_model_proj.h5"%(out_model_dir),proj)
        serializers.load_hdf5("%s/dcgan_state_proj.h5"%(out_model_dir),o_proj)


    o_evol.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_proj.add_hook(chainer.optimizer.WeightDecay(0.00001))


    vis_process = None
    for epoch in xrange(epoch0,n_epoch):
        for train_ctr in xrange(0, n_train, batchsize):
            print  "epoch:", epoch,"train:",train_ctr,
            # discriminator
            # 0: from dataset
            # 1: from noise

            good_movie=True
            prediction_movie=n_movie*[None]
            try:
                current_movie = load_movie()
            except:
                continue


            for i in range(n_timeseries-1):
                if current_movie[i] is None:
                    good_movie=False
                else:
                    prediction_movie[i]=current_movie[i]
            if not good_movie: continue
            for i in range(n_timeseries-1,n_movie):
                prediction_movie[i] = evolve_image(evol,proj,prediction_movie[i-n_timeseries+1 : i])



            if train_ctr%save_interval==0:
                for answer_mode in ['predict','observe']:
                    for offset in [n_timeseries,16,32,64,119]:
                        if offset >= n_movie: continue
                        img_prediction = prediction_movie[offset]
                        if answer_mode == 'observe':
                            img_prediction = current_movie[offset]                            
                        if img_prediction is None: continue
                        imgfn = '%s/futuresun_%d_%04d_%s+%03d.png'%(out_image_dir, epoch,train_ctr,answer_mode,offset)
                        plt.rcParams['figure.figsize'] = (12.0, 12.0)
                        plt.close('all')
                        plt.imshow(img_prediction,vmin=0,vmax=1.4)
                        plt.suptitle(imgfn)
                        plt.savefig(imgfn)
                        subprocess.call("cp %s ~/public_html/futuresun/"%(imgfn),shell=True)

                # we don't have enough disk for history
                history_dir = 'history/' #%d-%d'%(epoch,  train_ctr)
                subprocess.call("mkdir -p %s "%(history_dir),shell=True)
                subprocess.call("cp %s/*.h5 %s "%(out_model_dir,history_dir),shell=True)
                
                if epoch>0 or train_ctr>0:
                    print 'saving model...'
                    serializers.save_hdf5("%s/dcgan_model_evol.h5"%(out_model_dir),evol)
                    serializers.save_hdf5("%s/dcgan_state_evol.h5"%(out_model_dir),o_evol)
                    serializers.save_hdf5("%s/dcgan_model_dis.h5"%(out_model_dir),dis)
                    serializers.save_hdf5("%s/dcgan_state_dis.h5"%(out_model_dir),o_dis)
                    serializers.save_hdf5("%s/dcgan_model_proj.h5"%(out_model_dir),proj)
                    serializers.save_hdf5("%s/dcgan_state_proj.h5"%(out_model_dir),o_proj)
                    print '...saved.'
            

            movie_in = None
            movie_out = None
            movie_out_predict=None
            evol_scores = {}
            proj_scores = {}
            matsuoka_shuzo = {}
            shuzo_evoke_timestep = []
            difficulties = ['normal','hard']
            vis_kit = {}
            for difficulty in difficulties:
                evol_scores[difficulty] = [0.0]
                proj_scores[difficulty] = [0.0]
                matsuoka_shuzo[difficulty] = True
                vis_kit[difficulty] = None
            matsuoka_shuzo['normal'] = False # dameda, dameda.... Akirameyou....
            if vis_process is not None:
                vis_process.join()
                vis_process = None

            # start main training routine.
            print
            next_shuzo_scale=10.0 * (1+epoch)
            next_shuzo_offset = 1 + abs(int(round(np.random.normal(scale=next_shuzo_scale))))
            for train_offset in range(0,n_movie-n_timeseries):
                for difficulty in difficulties:
                    movie_clip = current_movie
                    if not matsuoka_shuzo[difficulty]:
                        # Doushitesokode yamerunda...
                        continue
                    else:
                        # Akiramen'nayo!
                        pass

                    if difficulty == 'normal':
                        movie_clip_in = movie_clip
                    else:
                        movie_clip_in = prediction_movie
                    maybe_dat = create_batch(train_offset,movie_clip_in, movie_clip)
                    if not maybe_dat : 
                        #print "Warning: skip offset", train_offset, "because of unavailable data."
                        continue
                    data_in, data_out, data_other = maybe_dat
                    movie_in =  Variable(cuda.to_gpu(data_in))
                    movie_out = Variable(cuda.to_gpu(data_out))
                    movie_other = Variable(cuda.to_gpu(data_other))

                    movie_out_predict_before = evol(movie_in)
                    movie_out_predict = proj(movie_out_predict_before) # no proj

                    vis_kit[difficulty] = (movie_in.data.get(),
                                          movie_out.data.get(),
                                          movie_out_predict_before.data.get(),
                                          movie_out_predict.data.get())


                    if args.norm == 'dcgan':
                        yl = dis(movie_in,movie_out_predict)
                        L_evol = F.softmax_cross_entropy(yl, Variable(xp.zeros(batchsize, dtype=np.int32)))
                        L_dis  = F.softmax_cross_entropy(yl, Variable(xp.ones(batchsize, dtype=np.int32)))

                        # train discriminator
                        yl_train = dis(movie_in,movie_out)
                        L_dis += F.softmax_cross_entropy(yl_train, Variable(xp.zeros(batchsize, dtype=np.int32)))
                    elif args.norm == 'CA':
                        L_evol = d_norm(0, dis, movie_out, movie_out_predict_before)
                        L_proj = d_norm(0, dis, movie_out, movie_out_predict)
                        L_dis  = d_norm(1, dis, movie_out, movie_out_predict_before)
                        # L_dis  += d_norm(1, dis, movie_out, movie_out_predict)
                        L_dis  += d_norm(0, dis, movie_out, movie_other)
                        # L_dis  += d_norm(0, dis, movie_other, movie_out)
                    else:
                        L2norm = (movie_out - movie_out_predict)**2
                        yl = F.sum(L2norm) / L2norm.data.size
                        L_evol = yl
                    
    
                    evol_scores[difficulty] += [L_evol.data.get()] # np.average(F.softmax(yl).data.get()[:,0])
                    proj_scores[difficulty] += [L_proj.data.get()] # np.average(F.softmax(yl).data.get()[:,0])
    
                    
                    # stop learning on normal mode.
                    if difficulty == 'hard':
                        o_evol.zero_grads()
                        L_evol.backward()
                        o_evol.update()
                    
                        o_dis.zero_grads()
                        L_dis.backward()
                        o_dis.update()

                        o_proj.zero_grads()
                        L_proj.backward()
                        o_proj.update()

                    movie_in.unchain_backward()
                    movie_out_predict.unchain_backward()
                    movie_out_predict_before.unchain_backward()
                    movie_other.unchain_backward()
                    L_evol.unchain_backward()
                    if args.norm == 'dcgan' or args.norm == 'CA':
                        L_dis.unchain_backward()
    
                    sys.stdout.write('%d %6s %s: %f -> %f, %f -> %f shuzo:%s\r'%(train_offset,difficulty, args.norm,
                                                                    np.average(evol_scores['normal']), np.average(proj_scores['normal']), 
                                                                    np.average(evol_scores['hard']),   np.average(proj_scores['hard']),
                                                                    str(shuzo_evoke_timestep[-10:])))
                    sys.stdout.flush()

                    # update the prediction as results of learning.
                    prediction_movie[train_offset+n_timeseries-1] = evolve_image(evol,proj,prediction_movie[train_offset: train_offset+n_timeseries-1])

                    # prevent too much learning from noisy prediction.
                    # if len(evol_scores['hard'])>=10 and np.average(evol_scores['hard'][-5:-1]) > 5 * np.average(evol_scores['normal']):
                    if train_offset == next_shuzo_offset:
                        next_shuzo_offset = train_offset + 1 + abs(int(round(np.random.normal(scale=next_shuzo_scale))))
                        # Zettaini, akiramennna yo!
                        # matsuoka_shuzo['hard'] = False
                        shuzo_evoke_timestep += [train_offset]
                        evol_scores['hard']=[0.0]
                        proj_scores['hard']=[0.0]
                        for t in range(train_offset, train_offset+n_timeseries):
                            if current_movie[t] is not None:
                                prediction_movie[t]=current_movie[t]


            print
            def visualize_vis_kit(vis_kit):
                print "visualizing...",
                sys.stdout.flush()
                for difficulty in difficulties:
                    if vis_kit[difficulty] is None:
                        continue
                    movie_data, movie_out_data, movie_pred_data, movie_proj_data = vis_kit[difficulty]
                    imgfn = '%s/batch-%s_%d_%04d.png'%(out_image_dir,difficulty, epoch,train_ctr)
                
                    n_col=n_timeseries+3
                    plt.rcParams['figure.figsize'] = (1.0*n_col,1.0*batchsize)
                    plt.close('all')
                
                    for ib in range(batchsize):
                        for j in range(n_timeseries-1):
                            plt.subplot(batchsize,n_col,1 + ib*n_col + j)
                            if j < 2:
                                vmin=-1; vmax=1
                            else:
                                vmin=0; vmax=1.4
                            plt.imshow(movie_data[ib,j,:,:],vmin=vmin,vmax=vmax)
                            plt.axis('off')
                
                        plt.subplot(batchsize,n_col,1 + ib*n_col + n_timeseries-1)
                        plt.imshow(movie_pred_data[ib,0,:,:],vmin=0,vmax=1.4)
                        plt.axis('off')

                        plt.subplot(batchsize,n_col,1 + ib*n_col + n_timeseries)
                        plt.imshow(movie_proj_data[ib,0,:,:],vmin=0,vmax=1.4)
                        plt.axis('off')
                
                        plt.subplot(batchsize,n_col,1 + ib*n_col + n_timeseries+2)
                        plt.imshow(movie_out_data[ib,0,:,:],vmin=0,vmax=1.4)
                        plt.axis('off')
                    
                    plt.suptitle(imgfn)
                    plt.savefig(imgfn)
                    subprocess.call("cp %s ~/public_html/suntomorrow-batch-%s-%s.png"%(imgfn,difficulty,args.gpu),shell=True)
                print "visualized.",
                sys.stdout.flush()


            vis_process = Process(target=visualize_vis_kit, args=(vis_kit,))
            vis_process.start()
Esempio n. 31
0
n_epochs = 50
batch_size = 100
bprop_len = 50
train_data = corpus.train_data
whole_len = train_data.shape[0]
jump = whole_len / batch_size

accum_loss = Variable(np.zeros((), dtype=np.float32))
for i in xrange(jump * n_epochs):
	x_batch = np.array([train_data[(jump * j + i) % whole_len] for j in xrange(batch_size)])
	y_batch = np.array([train_data[(jump * j + i + 1) % whole_len] for j in xrange(batch_size)])
	print x_batch
	loss = net.trainOneStep(x_batch, y_batch)
	accum_loss += loss
	if (i + 1) % bprop_len == 0:
		print "i is %d / %d, loss is %f" % (i + 1, jump * n_epochs, accum_loss.data / bprop_len)
		optimizer.zero_grads()
		accum_loss.backward()
		accum_loss.unchain_backward() 
		accum_loss = Variable(np.zeros((), dtype=np.float32))
		optimizer.clip_grads(5.0)
		optimizer.update()


print corpus.decode(net.predict([corpus.encode("Before we proceed")], num=100)[0])
print corpus.decode(net.predict([corpus.encode("My lord")], num=100)[0])


# serializers.save_npz("model/shakespeare.mod", net)
def evaluate(architecture, waves, infos, gpu_id, waveFs, fileParam):
    if cupy is not None and gpu_id >= 0:
        xp = cupy
        cupy.cuda.Device(gpu_id).use()
    else:
        xp = np

    inputLength = totalInputLength(architecture)
    labels = getLabels()
    numLabel = len(labels)
    groupFold = ((0, 1, 2), (3, ), (4, ))

    devSize = 2**1
    devSegmentSecUpper = 10

    net = Net(numLabel, architecture, functions.elu)
    serializers.load_hdf5(fileParam, net)
    if gpu_id >= 0: net.to_gpu(gpu_id)

    devSegmentLenUpper = int(devSegmentSecUpper * waveFs)
    devFold = sorted(set(groupFold[2]))
    devLabelWave = groupLabelWave((devFold, ), infos)[0]
    devLabelWave = list(
        itertools.chain.from_iterable([[(li, i) for i in devLabelWave[la]]
                                       for li, la in enumerate(labels)]))
    devLabelWave = sorted(devLabelWave, key=lambda lw: len(waves[lw[1]]))
    devBatchIndex = np.array_split(np.arange(len(devLabelWave)),
                                   int(np.ceil(len(devLabelWave) / devSize)))
    devLabelSize = np.zeros(numLabel, int32)
    for li, wi in devLabelWave:
        devLabelSize[li] += len(waves[wi])

    devWaves = {}
    for li, wi in devLabelWave:
        wave = waves[wi]
        wave = np.concatenate((wave, np.zeros((inputLength - 1) // 2,
                                              float32)))
        devWaves[wi] = wave

    with chainer.using_config("enable_backprop", False):
        confusion = np.zeros((numLabel, numLabel), int32)
        for bi, index in enumerate(devBatchIndex):
            waveIndex = np.array([devLabelWave[i][1] for i in index])
            tru = np.array([devLabelWave[i][0] for i in index])
            waveLen = len(devWaves[waveIndex[-1]])
            segmentTimes = np.array_split(
                np.arange(waveLen), int(np.ceil(
                    (waveLen) / devSegmentLenUpper)))
            net.reset()
            for si, segTime in enumerate(segmentTimes):
                t0 = segTime[0]
                t1 = segTime[-1] + 1
                x = np.zeros((len(index), t1 - t0), float32)
                tr = -np.ones((len(index), t1 - t0), int32)
                for xi, wi in enumerate(waveIndex):
                    if len(devWaves[wi]) <= t0: continue
                    w = devWaves[wi][t0:t1]
                    x[xi, :len(w)] = w
                    tr[xi, :len(w)] = tru[xi]
                if t0 < (inputLength - 1) // 2:
                    tr[:, :(inputLength - 1) // 2 - t0] = -1

                x = x[:, newaxis, :, newaxis]
                x = xp.asarray(x)
                x = Variable(x)
                x = net(x, False)
                x.unchain_backward()

                x = xp.argmax(x.data, axis=1)
                if gpu_id >= 0: x = cupy.asnumpy(x)
                x = x.flatten()
                tr = tr.flatten()
                for xi, ti in zip(x[tr >= 0], tr[tr >= 0]):
                    confusion[ti, xi] += 1

        net.reset()
        assert (np.sum(confusion, axis=1) == devLabelSize).all()
        return confusion
Esempio n. 33
0
def train_dcgan_labeled(gen, retou, dis, epoch0=0):
    o_gen = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_retou = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_gen.setup(gen)
    o_retou.setup(retou)
    o_dis.setup(dis)
    if not args.fresh_start:
        serializers.load_hdf5("%s/dcgan_model_dis.h5"%(out_model_dir),dis)
        serializers.load_hdf5("%s/dcgan_model_gen.h5"%(out_model_dir),gen)
        serializers.load_hdf5("%s/dcgan_model_retou.h5"%(out_model_dir),retou)
        serializers.load_hdf5("%s/dcgan_state_dis.h5"%(out_model_dir),o_dis)
        serializers.load_hdf5("%s/dcgan_state_gen.h5"%(out_model_dir),o_gen)
        serializers.load_hdf5("%s/dcgan_state_retou.h5"%(out_model_dir),o_retou)


    o_gen.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_retou.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001))

    zvis = (xp.random.uniform(-1, 1, (100, nz), dtype=np.float32))

    x_retouch_motif = None
    retouch_fail_count = 0
    last_retouch_loss = 1.2e99
    
    for epoch in xrange(epoch0,n_epoch):
        print "epoch:", epoch
        perm = np.random.permutation(n_train)
        sum_l_dis = np.float32(0)
        sum_l_gen = np.float32(0)
        
        for i in xrange(0, n_train, batchsize):
            print "train:",i
            # discriminator
            # 0: from dataset
            # 1: from noise

            #print "load image start ", i
            x_train = load_dataset()
            #print "load image done"
            
            # train generator
            z = Variable(xp.random.uniform(-1, 1, (batchsize, nz), dtype=np.float32))
            x = gen(z)
            yl = dis(x)

            L_gen = F.softmax_cross_entropy(yl, Variable(xp.zeros(batchsize, dtype=np.int32)))
            L_dis = F.softmax_cross_entropy(yl, Variable(xp.ones(batchsize, dtype=np.int32)))
            
            # train discriminator
            x_train = Variable(cuda.to_gpu(x_train))
            yl_train = dis(x_train)

            softmax_gen = F.softmax(yl).data[:,0]
            average_softmax=np.average(cuda.to_cpu(softmax_gen))
            if math.isnan(average_softmax) : 
                serializers.save_hdf5("%s/NaN_dcgan_model_dis.h5"%(out_model_dir),dis)
                serializers.save_hdf5("%s/NaN_dcgan_model_gen.h5"%(out_model_dir),gen)
                serializers.save_hdf5("%s/NaN_dcgan_model_retou.h5"%(out_model_dir),retou)
                serializers.save_hdf5("%s/NaN_dcgan_state_dis.h5"%(out_model_dir),o_dis)
                serializers.save_hdf5("%s/NaN_dcgan_state_gen.h5"%(out_model_dir),o_gen)
                serializers.save_hdf5("%s/NaN_dcgan_state_retou.h5"%(out_model_dir),o_retou)
                exit()
            if average_softmax < 1e-3:
                train_sample_factor = 10.0
            elif average_softmax < 1e-2:
                train_sample_factor = 4.0
            elif average_softmax > 0.4:
                train_sample_factor = 1.0
            else:
                train_sample_factor = 2.0
            train_sample_factor = 2.0

            L_dis += train_sample_factor * F.softmax_cross_entropy(yl_train, Variable(xp.zeros(batchsize, dtype=np.int32)))
            
                
            

            #train retoucher
            if type(x_retouch_motif)==type(None) or retouch_fail_count >= min(1+ epoch, 10):
                print "Supply new motifs to retoucher."
                x_retouch_motif = Variable(x.data)
                retouch_fail_count = 0
                last_retouch_loss = 99e99

            x3=retou(x_retouch_motif)  # let the retoucher make the generated image better
            yl1st = dis(x3)   # and try deceive the discriminator
            
            # retoucher want their image to look like those from dataset(zeros), 
            # while discriminators want to classify them as from noise(ones)
            L_retou = F.softmax_cross_entropy(yl1st, Variable(xp.zeros(batchsize, dtype=np.int32)))
            L_dis  += F.softmax_cross_entropy(yl1st, Variable(xp.ones(batchsize, dtype=np.int32)))
            
    
            o_gen.zero_grads()
            L_gen.backward()
            o_gen.update()
            
            o_retou.zero_grads()
            L_retou.backward()
            o_retou.update()
            
            o_dis.zero_grads()
            L_dis.backward()
            o_dis.update()


            retouch_loss = float(str((L_retou).data))
            if retouch_loss >= last_retouch_loss:
                retouch_fail_count += 1
            last_retouch_loss = min(retouch_loss,last_retouch_loss)
            
            #print "backward done"

            sum_l_gen += L_gen.data.get()
            sum_l_dis += L_dis.data.get()
            
            x.unchain_backward()
            x_train.unchain_backward()
            x3.unchain_backward()
            x_retouch_motif = x3
            
            L_gen.unchain_backward()
            L_retou.unchain_backward()
            L_dis.unchain_backward()



            print "epoch:",epoch,"iter:",i,"softmax:",average_softmax, "retouch:",retouch_fail_count, retouch_loss

            if i%image_save_interval==0:
                n_retou=2

                plt.rcParams['figure.figsize'] = (16.0,16.0*n_retou)
                plt.close('all')
                

                vissize = 100
                z = zvis
                z[50:,:] = (xp.random.uniform(-1, 1, (50, nz), dtype=np.float32))
                z = Variable(z)
                x = gen(z, test=True)
                x_data = x.data.get()
                imgfn = '%s/vis_%d_%d.png'%(out_image_dir, epoch,i)

                x_split = F.split_axis(x,vissize,0)


                def mktitle(x1):
                    d1 =  F.softmax(dis(x1,test=True))
                    def ppr(d):
                        f = float(str(d.data[0,0]))
                        return '{:0.3}'.format(f)
                    ret = '{}'.format(ppr(d1))
                    return ret

                for i_ in range(100):
                    tmp = ((np.vectorize(clip_img)(x_data[i_,:,:,:])+1)/2).transpose(1,2,0)
                    plt.subplot(n_retou*10+9,10,1+i_%10+(i_/10)*10*(n_retou+1))
                    plt.imshow(tmp)
                    plt.axis('off')
                    plt.title(mktitle(x_split[i_]),fontsize=6)

                r_p_cnt = 0
                print "vis-retouch:",
                for cnt_step in (n_retou-1) * [1]:
                    r_p_cnt+=1
                    for r_cnt in range(cnt_step):
                        print r_cnt,
                        sys.stdout.flush()
                        x.unchain_backward()
                        x = retou(x, test=True)
                    x3_data = x.data.get()
                    x3_split = F.split_axis(x,vissize,0)
                    
                    for i_ in range(100):
                        tmp = ((np.vectorize(clip_img)(x3_data[i_,:,:,:])+1)/2).transpose(1,2,0)

                        plt.subplot(n_retou*10+9,10,1+i_%10+(i_/10)*10*(n_retou+1)+10*r_p_cnt)
                        plt.imshow(tmp)
                        plt.axis('off')
                        plt.title(mktitle(x3_split[i_]),fontsize=6)
                plt.suptitle(imgfn)
                plt.savefig(imgfn)
                print imgfn

                subprocess.call("cp %s ~/public_html/dcgan-%d.png"%(imgfn,args.gpu),shell=True)

                serializers.save_hdf5("%s/dcgan_model_dis.h5"%(out_model_dir),dis)
                serializers.save_hdf5("%s/dcgan_model_gen.h5"%(out_model_dir),gen)
                serializers.save_hdf5("%s/dcgan_model_retou.h5"%(out_model_dir),retou)
                serializers.save_hdf5("%s/dcgan_state_dis.h5"%(out_model_dir),o_dis)
                serializers.save_hdf5("%s/dcgan_state_gen.h5"%(out_model_dir),o_gen)
                serializers.save_hdf5("%s/dcgan_state_retou.h5"%(out_model_dir),o_retou)
                
                # we don't have enough disk for history
                #history_dir = 'history/%d-%d'%(epoch,  i)
                #subprocess.call("mkdir -p %s "%(history_dir),shell=True)
                #subprocess.call("cp %s/*.h5 %s "%(out_model_dir,history_dir),shell=True)


        print 'epoch end', epoch, sum_l_gen/n_train, sum_l_dis/n_train
Esempio n. 34
0
     if args.gpu >= 0:
         x_t = cuda.to_gpu(x_t)
         y_t = cuda.to_gpu(y_t)
     state, loss_i = model.forward_one_step(x_t, y_t, state, dropout_ratio=args.dropout)
     loss += loss_i
 now = time.time()
 end_time += now - cur_at
 iterations_count += 1
 print "loss_all=" + str(loss.data)
 print "{}, train_loss = {}, time = {:.4f}".format(
     iterations_count, loss.data / (len(train_data[i % whole_len]) - 1), now - cur_at
 )
 cur_at = now
 optimizer.zero_grads()
 loss.backward()
 loss.unchain_backward()
 optimizer.clip_grads(grad_clip)
 optimizer.update()
 if (i + 1) == (whole_len * n_epochs):
     cuda.cupy.save("l1_x_W.npy", model.l1_x.W)
     cuda.cupy.save("l1_x_b.npy", model.l1_x.b)
     cuda.cupy.save("l1_h_W.npy", model.l1_h.W)
     cuda.cupy.save("l1_h_b.npy", model.l1_h.b)
     cuda.cupy.save("l6_W.npy", model.l6.W)
     cuda.cupy.save("l6_b.npy", model.l6.b)
 if ((i + 1) % whole_len) == 0:
     epoch += 1
     train_loss_all.append(loss.data.get() / len(train_data[i % whole_len]))
     for k in xrange(whole_val_len):
         val_state = make_initial_state(n_units)
         for key, value in val_state.items():
Esempio n. 35
0
    if args.gpu >=0:
        x_batch = cuda.to_gpu(x_batch)
        y_batch = cuda.to_gpu(y_batch)

    state, loss_i = model.forward_one_step(x_batch, y_batch, state, dropout_ratio=args.dropout)
    accum_loss   += loss_i

    if (i + 1) % bprop_len == 0:  # Run truncated BPTT
        now = time.time()
        print '{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at)
        loss_file.write('{}\n'.format(accum_loss.data / bprop_len))
        cur_at = now

        optimizer.zero_grads()
        accum_loss.backward()
        accum_loss.unchain_backward()  # truncate
        if args.gpu >= 0:
            accum_loss = Variable(cuda.zeros(()))
        else:
            accum_loss = Variable(np.zeros(()).astype(np.float32))

        optimizer.clip_grads(grad_clip)
        optimizer.update()

    if args.enable_checkpoint:
        if (i + 1) % 10000 == 0:
            fn = ('%s/charrnn_epoch_%.2f.chainermodel' % (args.checkpoint_dir, float(i)/jump))
            pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb'))

    if (i + 1) % jump == 0:
        epoch += 1
Esempio n. 36
0
     state, loss_i = model.forward_one_step(x_t,
                                            y_t,
                                            state,
                                            dropout_ratio=args.dropout)
     loss += loss_i
 now = time.time()
 end_time += now - cur_at
 iterations_count += 1
 print 'loss_all=' + str(loss.data)
 print '{}, train_loss = {}, time = {:.4f}'.format(
     iterations_count, loss.data / (len(train_data[i % whole_len]) - 1),
     now - cur_at)
 cur_at = now
 optimizer.zero_grads()
 loss.backward()
 loss.unchain_backward()
 optimizer.clip_grads(grad_clip)
 optimizer.update()
 if (i + 1) == (whole_len * n_epochs):
     cuda.cupy.save('l1_x_W.npy', model.l1_x.W)
     cuda.cupy.save('l1_x_b.npy', model.l1_x.b)
     cuda.cupy.save('l1_h_W.npy', model.l1_h.W)
     cuda.cupy.save('l1_h_b.npy', model.l1_h.b)
     cuda.cupy.save('l6_W.npy', model.l6.W)
     cuda.cupy.save('l6_b.npy', model.l6.b)
 if ((i + 1) % whole_len) == 0:
     epoch += 1
     train_loss_all.append(loss.data.get() / len(train_data[i % whole_len]))
     for k in xrange(whole_val_len):
         val_state = make_initial_state(n_units)
         for key, value in val_state.items():
Esempio n. 37
0
    if args.gpu >=0:
        x_batch = cuda.to_gpu(x_batch)
        y_batch = cuda.to_gpu(y_batch)

    state, loss_i = model.forward_one_step(x_batch, y_batch, state, dropout_ratio=args.dropout)
    accum_loss   += loss_i

    if (i + 1) % bprop_len == 0:  # Run truncated BPTT
        now = time.time()
        print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at))
        cur_at = now

        optimizer.zero_grads()
        accum_loss.backward()
        accum_loss.unchain_backward()  # truncate
        if args.gpu >= 0:
            accum_loss = Variable(cuda.zeros(()))
        else:
            accum_loss = Variable(np.zeros((), dtype=np.float32))

        optimizer.clip_grads(grad_clip)
        optimizer.update()

    if (i + 1) % 10000 == 0:
        fn = ('%s/charrnn_epoch_%.2f.chainermodel' % (args.checkpoint_dir, float(i)/jump))
        pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb'))

    if (i + 1) % jump == 0:
        epoch += 1
Esempio n. 38
0
    def update_core(self):
        enc_optimizer = self.get_optimizer('enc')
        dec_optimizer = self.get_optimizer('dec')
        dis_optimizer = self.get_optimizer('dis')

        enc, dec, dis = self.enc, self.dec, self.dis
        xp = enc.xp

        batch = self.get_iterator('main').next()
        batchsize = len(batch)
        in_ch = batch[0][0].shape[0]
        out_ch = batch[0][1].shape[0]
        w_in = 256
        w_out = 256

        x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f")
        t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f")

        for i in range(batchsize):
            x_in[i, :] = xp.asarray(batch[i][0])
            t_out[i, :] = xp.asarray(batch[i][1])
        x_in = Variable(x_in)

        z = enc(x_in)
        x_out = dec(z)

        y_fake = dis(x_in, x_out)
        y_real = dis(x_in, t_out)

        enc_optimizer.update(self.loss_enc, enc, x_out, t_out, y_fake)
        for z_ in z:
            z_.unchain_backward()
        dec_optimizer.update(self.loss_dec, dec, x_out, t_out, y_fake)
        x_in.unchain_backward()
        x_out.unchain_backward()
        dis_optimizer.update(self.loss_dis, dis, y_real, y_fake)

        eval_interval = 10
        if self.iteration % eval_interval == 0:
            eval_fp = open("eval_log.txt", "a")

            batch = self.get_iterator("test").next()
            batchsize = len(batch)
            in_ch = batch[0][0].shape[0]
            out_ch = batch[0][1].shape[0]
            w_in = 256
            w_out = 256

            x_in = xp.zeros((batchsize, in_ch, w_in, w_in)).astype("f")
            t_out = xp.zeros((batchsize, out_ch, w_out, w_out)).astype("f")

            for i in range(batchsize):
                x_in[i, :] = xp.asarray(batch[i][0])
                t_out[i, :] = xp.asarray(batch[i][1])
            x_in = Variable(x_in)

            z = enc(x_in)
            x_out = dec(z)

            y_fake = dis(x_in, x_out)
            y_real = dis(x_in, t_out)

            def eval_enc(self, enc, x_out, t_out, y_out, lam1=100, lam2=1):
                batchsize, _, w, h = y_out.data.shape
                loss_rec = lam1 * (F.mean_absolute_error(x_out, t_out))
                loss_adv = lam2 * F.sum(F.softplus(-y_out)) / batchsize / w / h
                loss = loss_rec + loss_adv
                chainer.report({'eval_loss': eval_fp}, enc)
                return loss

            def eval_dis(self, dis, y_in, y_out):
                batchsize, _, w, h = y_in.data.shape
                L1 = F.sum(F.softplus(-y_in)) / batchsize / w / h
                L2 = F.sum(F.softplus(y_out)) / batchsize / w / h
                loss = L1 + L2
                chainer.report({'eval_loss': eval_fp}, dis)
                return loss

            eval_fp.write('{}\t{}\t\t{}\t\t{}\n'.format(
                self.epoch, self.iteration,
                eval_enc(self.loss_enc, enc, x_out, t_out, y_fake),
                eval_dis(self.loss_dis, dis, y_real, y_fake)))
            eval_fp.close()