Ejemplo n.º 1
0
    def __fit_one(self, link, content_layers, style_patches):
        xp = self.xp
        link.zerograds()
        layers = self.model(link.x)
        if self.keep_color:
            trans_layers = self.model(util.gray(link.x))
        else:
            trans_layers = layers
        loss_info = []
        loss = Variable(xp.zeros((), dtype=np.float32))
        for name, content_layer in content_layers:
            layer = layers[name]
            content_loss = self.content_weight * F.mean_squared_error(
                layer, Variable(content_layer.data))
            loss_info.append(('content_' + name, float(content_loss.data)))
            loss += content_loss
        for name, style_patch, style_patch_norm in style_patches:
            patch = trans_layers[name]
            near, size, size2 = util.nearest_neighbor_patch(
                patch, style_patch, style_patch_norm)
            style_loss = self.style_weight * (
                F.sum(F.square(patch)) * size2 / size - 2 * F.sum(near) / size)

            loss_info.append(('style_' + name, float(style_loss.data)))
            loss += style_loss
        tv_loss = self.tv_weight * util.total_variation(link.x)
        loss_info.append(('tv', float(tv_loss.data)))
        loss += tv_loss
        loss.backward()
        self.optimizer.update()
        return loss_info
Ejemplo n.º 2
0
 def __fit_one(self, link, content_layers, style_grams):
     xp = self.xp
     link.zerograds()
     layers = self.model(link.x)
     if self.keep_color:
         trans_layers = self.model(util.gray(link.x))
     else:
         trans_layers = layers
     loss_info = []
     loss = Variable(xp.zeros((), dtype=np.float32))
     for name, content_layer in content_layers:
         layer = layers[name]
         content_loss = self.content_weight * F.mean_squared_error(layer, Variable(content_layer.data))
         loss_info.append(('content_' + name, float(content_loss.data)))
         loss += content_loss
     for name, style_gram in style_grams:
         gram = util.gram_matrix(trans_layers[name])
         style_loss = self.style_weight * F.mean_squared_error(gram, Variable(style_gram.data))
         loss_info.append(('style_' + name, float(style_loss.data)))
         loss += style_loss
     tv_loss = self.tv_weight * util.total_variation(link.x)
     loss_info.append(('tv', float(tv_loss.data)))
     loss += tv_loss
     loss.backward()
     self.optimizer.update()
     return loss_info
Ejemplo n.º 3
0
def optimizeCRNN(iterNum,maxIndex,indicies):
    batchSize = 1000
    model = EvalCRNN(maxIndex,500)
    print(len(indicies),computeEntropy(maxIndex,indicies))
    learningRate = 0.001
    epoch = 3 
    for j in range(epoch):
        
        my_optimizer = optimizers.RMSpropGraves(lr = learningRate)
        my_optimizer.setup(model) 
        my_optimizer.add_hook(optimizer.GradientClipping(1))
        
        model.cRNN.reset()
        
        loss = Variable(np.array([[0]]))
        for i in range(iterNum):
            t1 = time.clock()
            model.zerograds()
            loss.unchain_backward()
            loss = model(indicies[batchSize*i:batchSize*(i+1)],iterNum*batchSize)
            loss.backward()
            t2 = time.clock()
            
            msg = "iter: " + str(i + iterNum * j + 1) + "/" + str(iterNum * epoch) 
            msgLoss = "loss: " + str(loss.data/batchSize)
            msgNorm = "grad: " + str(my_optimizer.compute_grads_norm())
            msgTime = "time: " + str(t2 - t1) + " seconds"
            print(msgLoss,msgNorm,msg,msgTime)
            my_optimizer.update()

        learningRate *= 0.50

    print(model(indicies[batchSize*(iterNum):batchSize*(iterNum+10)]).data/(batchSize*10))
    return model.cRNN
Ejemplo n.º 4
0
    def __fit_one(self, link, content4_2, style3_2,style4_2):
        xp = self.xp
        link.zerograds()
        layer3_2,layer4_2 = self.model(link.x)
        if self.keep_color:
            #trans_layers = self.model(util.gray(link.x))
            print "don't keep color!"
        loss_info = []
        loss = Variable(xp.zeros((), dtype=np.float32))
        #layer = layers[name]
        content_loss = self.content_weight * F.mean_squared_error(layer4_2, Variable(content4_2))
        loss_info.append(('content_', float(content_loss.data)))
        loss += content_loss

        style_patch, style_patch_norm =  style3_2
        near,size,size2 = util.nearest_neighbor_patch(layer3_2, style_patch, style_patch_norm)
        style_loss = self.style_weight * (F.sum(F.square(layer3_2))*size2/size-2*F.sum(near)/size) 
        loss_info.append(('style_', float(style_loss.data)))
        loss+=style_loss
        
        style_patch, style_patch_norm =  style4_2
        near,size,size2 = util.nearest_neighbor_patch(layer4_2, style_patch, style_patch_norm)
        style_loss = self.style_weight *1.5* (F.sum(F.square(layer4_2))*size2/size-2*F.sum(near)/size) 
        loss_info.append(('style_', float(style_loss.data)))
        loss+= style_loss

        tv_loss = self.tv_weight * util.total_variation(link.x)
        loss_info.append(('tv', float(tv_loss.data)))
        loss+=tv_loss
        loss.backward()
        self.optimizer.update()
        return loss_info
Ejemplo n.º 5
0
    def step(self, perm, batch_index, mode, epoch):
        if mode == 'train':
            data, first_words, label = self.read_batch(perm, batch_index,
                                                       self.train_data, mode)
            train = True
        else:
            data, first_words, label = self.read_batch(perm, batch_index,
                                                       self.test_data, mode)
            train = False

        data = Variable(cuda.to_gpu(data))
        state = {
            name: Variable(
                self.xp.zeros((self.batchsize, 1024), dtype=self.xp.float32))
            for name in ('c1', 'h1')
        }
        loss = Variable(cuda.cupy.asarray(0.0).astype(np.float32))
        acc = 0.0

        ### image-encoder ###
        h = self.enc(data, train=train, test=not train)
        h = h.data
        h = Variable(h)

        ### first LSTM ###
        state, _ = self.dec(h, state, train=train, test=not train, image=True)
        ### input <SOS> ###
        state, y = self.dec(Variable(cuda.to_gpu(first_words)),
                            state,
                            train=train,
                            test=not train)
        loss += F.softmax_cross_entropy(y, Variable(cuda.to_gpu(label.T[1])))
        acc += F.accuracy(y,
                          Variable(cuda.to_gpu(label.T[1])),
                          ignore_label=-1).data.get()

        for cur_word, next_word in zip(label.T[1:-1], label.T[2:]):
            state, y = self.dec(Variable(cuda.to_gpu(cur_word)),
                                state,
                                train=train,
                                test=not train)
            loss += F.softmax_cross_entropy(y,
                                            Variable(cuda.to_gpu(next_word)))
            acc += F.accuracy(y,
                              Variable(cuda.to_gpu(next_word)),
                              ignore_label=-1).data.get()

        if mode == 'train':
            self.dec.cleargrads()
            loss.backward()
            self.o_dec.update()

        return {
            "prediction": 0,
            "current_loss": loss.data.get() / (label.T.shape[0]),
            "current_accuracy": acc / (label.T.shape[0]),
        }
Ejemplo n.º 6
0
def generate_image(img_orig, img_style, width, nw, nh, max_iter, lr, img_gen=None):
    batch_size = img_orig.shape[0]
    mid_orig = nn.forward(Variable(img_orig, volatile=True))
    style_mats = [get_matrix(y) for y in nn.forward(Variable(img_style, volatile=True))]

    if img_gen is None:
        if args.gpu >= 0:
            img_gen_ = xp.random.uniform(-20,20,(3,width,width),dtype=np.float32)
            img_gen = xp.random.uniform(-20,20,(batch_size,3,width,width),dtype=np.float32)
            img_gen[:,:,:,:] = img_gen_
        else:
            img_gen_ = np.random.uniform(-20,20,(3,width,width)).astype(np.float32)
            img_gen = np.random.uniform(-20,20,(batch_size,3,width,width)).astype(np.float32)
            img_gen[:,:,:,:] = img_gen_
    x = Variable(img_gen)
    xg = xp.zeros_like(x.data)
    optimizer = optimizers.Adam(alpha=lr)
    optimizer.setup((img_gen,xg))
    for i in range(max_iter):

        x = Variable(img_gen)
        y = nn.forward(x)

        optimizer.zero_grads()
        L = Variable(xp.zeros((), dtype=np.float32))
        for l in range(len(y)):
            gogh_matrix = get_matrix(y[l])

            L1 = np.float32(args.lam) * np.float32(nn.alpha[l])*F.mean_squared_error(y[l], Variable(mid_orig[l].data))
            L2 = np.float32(nn.beta[l])*F.mean_squared_error(gogh_matrix, Variable(style_mats[l].data))/np.float32(len(y))
            L += L1+L2

            if i%100==0:
                print (i,l,L1.data,L2.data)

        L.backward()
        xg += x.grad
        optimizer.update()

        tmp_shape = img_gen.shape
        if args.gpu >= 0:
            img_gen += Clip().forward(img_gen).reshape(tmp_shape) - img_gen
        else:
            def clip(x):
                return -120 if x<-120 else (136 if x>136 else x)
            img_gen += np.vectorize(clip)(img_gen).reshape(tmp_shape) - img_gen

        if i%50==0:
            for j in range(img_gen.shape[0]):
                save_image(img_gen[j], W, nw[j], nh[j], args.out_dir+"_%d/im_%05d.png"%(j,i))

    for j in range(img_gen.shape[0]):
        save_image(img_gen[j], W, nw[j], nh[j], args.out_dir+"_last/im_%d.png"%(j))
Ejemplo n.º 7
0
def generate_image(img_orig, img_style, width, nw, nh, max_iter, lr, img_gen=None):
    batch_size = img_orig.shape[0]
    mid_orig = nn.forward(Variable(img_orig, volatile=True))
    style_mats = [get_matrix(y) for y in nn.forward(Variable(img_style, volatile=True))]

    if img_gen is None:
        if args.gpu >= 0:
            img_gen_ = xp.random.uniform(-20,20,(3,width,width),dtype=np.float32)
            img_gen = xp.random.uniform(-20,20,(batch_size,3,width,width),dtype=np.float32)
            img_gen[:,:,:,:] = img_gen_
        else:
            img_gen_ = np.random.uniform(-20,20,(3,width,width)).astype(np.float32)
            img_gen = np.random.uniform(-20,20,(batch_size,3,width,width)).astype(np.float32)
            img_gen[:,:,:,:] = img_gen_
    x = Variable(img_gen)
    xg = xp.zeros_like(x.data)
    optimizer = optimizers.Adam(alpha=lr)
    optimizer.setup((img_gen,xg))
    for i in range(max_iter):

        x = Variable(img_gen)
        y = nn.forward(x)

        optimizer.zero_grads()
        L = Variable(xp.zeros((), dtype=np.float32))
        for l in range(len(y)):
            gogh_matrix = get_matrix(y[l])

            L1 = np.float32(args.lam) * np.float32(nn.alpha[l])*F.mean_squared_error(y[l], Variable(mid_orig[l].data))
            L2 = np.float32(nn.beta[l])*F.mean_squared_error(gogh_matrix, Variable(style_mats[l].data))/np.float32(len(y))
            L += L1+L2

            if i%100==0:
                print i,l,L1.data,L2.data

        L.backward()
        xg += x.grad
        optimizer.update()

        tmp_shape = img_gen.shape
        if args.gpu >= 0:
            img_gen += Clip().forward(img_gen).reshape(tmp_shape) - img_gen
        else:
            def clip(x):
                return -120 if x<-120 else (136 if x>136 else x)
            img_gen += np.vectorize(clip)(img_gen).reshape(tmp_shape) - img_gen

        if i%50==0:
            for j in range(img_gen.shape[0]):
                save_image(img_gen[j], W, nw[j], nh[j], args.out_dir+"_%d/im_%05d.png"%(j,i))

    for j in range(img_gen.shape[0]):
        save_image(img_gen[j], W, nw[j], nh[j], args.out_dir+"_last/im_%d.png"%(j))
Ejemplo n.º 8
0
class StatefulAgent(Agent):
    def __init__(self, model, optimizer=None, gpu=-1, cutoff=None, last=False):

        super(StatefulAgent, self).__init__(model,
                                            optimizer=optimizer,
                                            gpu=gpu,
                                            last=last,
                                            cutoff=cutoff)

        # cutoff for BPTT
        self.cutoff = cutoff

        # whether to update from loss in last step only
        self.last = last

        # keep track of loss for truncated BPTT
        self.loss = Variable(self.xp.zeros((), 'float32'))

    def run(self, data, train=True, idx=None, final=False):

        if (idx) % self.cutoff == 0:
            self.reset()

        loss = self.model(map(lambda x: Variable(self.xp.asarray(x)), data),
                          train=True)

        if self.last:  # used in case we propagate back at end of trials only
            if ((idx + 1) % self.cutoff) == 0:
                self.loss = loss
            else:
                loss = Variable(self.xp.zeros((), 'float32'))
        else:
            self.loss += loss

        # normalize by number of datapoints in minibatch
        _loss = float(loss.data)

        # backpropagate if we reach the cutoff for truncated backprop or if we processed the last batch
        if train and ((self.cutoff and
                       ((idx + 1) % self.cutoff) == 0) or final):
            self.optimizer.zero_grads()
            self.loss.backward()
            self.loss.unchain_backward()
            self.optimizer.update()
            self.loss = Variable(self.xp.zeros((), 'float32'))

        if not train:
            self.loss.unchain_backward()

        return _loss
Ejemplo n.º 9
0
def generate_image(img_orig, img_style, width, nw, nh, max_iter, lr, img_gen=None):
    with chainer.using_config('enable_backprop', True):
        mid_orig = nn.forward(Variable(img_orig))
    with chainer.using_config('enable_backprop', True):
        style_mats = [get_matrix(y) for y in nn.forward(Variable(img_style))]

    if img_gen is None:
        if args.gpu >= 0:
            img_gen = xp.random.uniform(-20,20,(1,3,width,width),dtype=np.float32)
        else:
            img_gen = np.random.uniform(-20,20,(1,3,width,width)).astype(np.float32)
    img_gen = chainer.links.Parameter(img_gen)
    optimizer = optimizers.Adam(alpha=lr)
    optimizer.setup(img_gen)
    for i in range(max_iter):
        img_gen.zerograds()

        x = img_gen.W
        y = nn.forward(x)

        L = Variable(xp.zeros((), dtype=np.float32))
        for l in range(len(y)):
            ch = y[l].data.shape[1]
            wd = y[l].data.shape[2]
            gogh_y = F.reshape(y[l], (ch,wd**2))
            gogh_matrix = F.matmul(gogh_y, gogh_y, transb=True)/np.float32(ch*wd**2)

            L1 = np.float32(args.lam) * np.float32(nn.alpha[l])*F.mean_squared_error(y[l], Variable(mid_orig[l].data))
            L2 = np.float32(nn.beta[l])*F.mean_squared_error(gogh_matrix, Variable(style_mats[l].data))/np.float32(len(y))
            L += L1+L2

            if i%100==0:
                print(i,l,L1.data,L2.data)

        L.backward()
        img_gen.W.grad = x.grad
        optimizer.update()

        tmp_shape = x.data.shape
        if args.gpu >= 0:
            img_gen.W.data += Clip().forward(img_gen.W.data).reshape(tmp_shape) - img_gen.W.data
        else:
            def clip(x):
                return -120 if x<-120 else (136 if x>136 else x)
            img_gen.W.data += np.vectorize(clip)(img_gen.W.data).reshape(tmp_shape) - img_gen.W.data

        if i%50==0:
            save_image(img_gen.W.data, W, nw, nh, i)
Ejemplo n.º 10
0
def generate_image(img_orig, img_style, width, nw, nh, max_iter, lr, img_gen=None):
    mid_orig = nn.forward(Variable(img_orig, volatile=True))
    style_mats = [get_matrix(y) for y in nn.forward(Variable(img_style, volatile=True))]

    if img_gen is None:
        if args.gpu >= 0:
            img_gen = xp.random.uniform(-20,20,(1,3,width,width),dtype=np.float32)
        else:
            img_gen = np.random.uniform(-20,20,(1,3,width,width)).astype(np.float32)
    x = Variable(img_gen)
    xg = xp.zeros_like(x.data)
    optimizer = optimizers.Adam(alpha=lr)
    optimizer.setup((img_gen,xg))
    for i in range(max_iter):

        x = Variable(img_gen)
        y = nn.forward(x)

        optimizer.zero_grads()
        L = Variable(xp.zeros((), dtype=np.float32))
        for l in range(len(y)):
            ch = y[l].data.shape[1]
            wd = y[l].data.shape[2]
            gogh_y = F.reshape(y[l], (ch,wd**2))
            gogh_matrix = F.matmul(gogh_y, gogh_y, transb=True)/np.float32(ch*wd**2)

            L1 = np.float32(args.lam) * np.float32(nn.alpha[l])*F.mean_squared_error(y[l], Variable(mid_orig[l].data))
            L2 = np.float32(nn.beta[l])*F.mean_squared_error(gogh_matrix, Variable(style_mats[l].data))/np.float32(len(y))
            L += L1+L2

            if i%100==0:
                print i,l,L1.data,L2.data

        L.backward()
        xg += x.grad
        optimizer.update()

        tmp_shape = img_gen.shape
        if args.gpu >= 0:
            img_gen += Clip().forward(img_gen).reshape(tmp_shape) - img_gen
        else:
            def clip(x):
                return -120 if x<-120 else (136 if x>136 else x)
            img_gen += np.vectorize(clip)(img_gen).reshape(tmp_shape) - img_gen

        if i%3000==0:
            save_image(img_gen, W, nw, nh, i)
Ejemplo n.º 11
0
    def step(self,perm,batch_index,mode,epoch): 
        if mode=='train':
            data, first_words, label=self.read_batch(perm,batch_index,self.train_data,mode)
            train = True
        else :
            data, first_words, label=self.read_batch(perm,batch_index,self.test_data,mode)
            train = False

        data = Variable(cuda.to_gpu(data))
        state = {name: Variable(self.xp.zeros((self.batchsize, 1024),dtype=self.xp.float32)) for name in ('c1', 'h1')}
        loss=Variable(cuda.cupy.asarray(0.0).astype(np.float32))
        acc=0.0

        ### image-encoder ###
        h = self.enc(data, train=train, test=not train)
        h=h.data
        h=Variable(h)


        ### first LSTM ###
        state,_ = self.dec(h, state,train=train, test=not train, image=True)
        ### input <SOS> ###
        state,y = self.dec(Variable(cuda.to_gpu(first_words)), state,train=train, test=not train)
        loss += F.softmax_cross_entropy(y, Variable(cuda.to_gpu(label.T[1])))
        acc += F.accuracy(y, Variable(cuda.to_gpu(label.T[1])), ignore_label=-1).data.get()

        for cur_word,next_word in zip(label.T[1:-1],label.T[2:]):
            state,y = self.dec(Variable(cuda.to_gpu(cur_word)), state,train=train, test=not train)
            loss += F.softmax_cross_entropy(y, Variable(cuda.to_gpu(next_word)))
            acc += F.accuracy(y, Variable(cuda.to_gpu(next_word)), ignore_label=-1).data.get()

        if mode=='train':
            self.dec.cleargrads()    
            loss.backward()
            self.o_dec.update()


        return {"prediction": 0,
                "current_loss": loss.data.get()/(label.T.shape[0]),
                "current_accuracy": acc/(label.T.shape[0]),
                }
Ejemplo n.º 12
0
    def train(self, data):

        if not self.cutoff:
            cutoff = data.nbatches
        else:
            cutoff = self.cutoff

        self.model.predictor.reset_state()

        cumloss = self.xp.zeros((), 'float32')

        loss = Variable(self.xp.zeros((), 'float32'))

        # check if we are in train or test mode (used e.g. for dropout)
        self.model.predictor.test = False
        self.model.predictor.train = True

        for _x, _t in data:

            x = Variable(_x)
            t = Variable(_t)
            self.model.predictor(x)
            # backpropagate if we reach the cutoff for truncated backprop or if we processed the last batch
            if data.step % cutoff == 0 or data.step == data.nbatches:

                loss += self.model(x, t)
                self.optimizer.zero_grads()

                loss.backward()
                loss.unchain_backward()
                self.optimizer.update()
                #self.model.predictor[0][0].U.W.data[10:,:]=0

                cumloss += loss.data
                loss = Variable(self.xp.zeros((), 'float32'))
                self.model.predictor.reset_state()

        return float(cumloss / (data.batch_ind.shape[1]))
Ejemplo n.º 13
0
    def experience_replay(self, time):
        if self.initial_exploration < time:
            replay_goal = min(len(self.goal_history), self.goal_replay_size)
            replay_all = min(replay_goal,
                             self.replay_size - self.goal_replay_size)
            # print "REPLAYING {} good and {} all".format(replay_goal, replay_all)

            replay_index = random.sample(range(len(self.history)), replay_all)
            goal_replay_index = random.sample(range(len(self.goal_history)),
                                              replay_goal)
            r_episodes = [deepcopy(self.history[id]) for id in replay_index] + \
                [deepcopy(self.goal_history[id]) for id in goal_replay_index]

            # # Can be harmful
            # # randomly decide length of episodes
            # for episode in r_episodes:
            #     length = random.randint(1, len(episode.actions))
            #     episode.actions = episode.actions[:length]
            #     episode.rewards = episode.rewards[:length]

            # update target model
            if self.initial_exploration == time + 1:
                self.optimizer.zero_grads()

            loss = Variable(np.asarray(np.float32(0.0)))
            for episode in r_episodes:
                loss += self.get_loss(episode)
            loss.backward()
            self.optimizer.update()
            self.target_model_update(time, soft_update=False)

            # set model to original state
            if self.history[-1].ended:
                self.model.set_state([-1])
            else:
                self.model.set_state([-1] + self.history[-1].actions)
Ejemplo n.º 14
0
    def train(self, data):
        self.model.predictor.reset_state()

        cumloss = self.xp.zeros((), 'float32')

        loss = Variable(self.xp.zeros((), 'float32'))

        # check if we are in train or test mode (e.g. for dropout)
        self.model.predictor.test = False
        self.model.predictor.train = True

        for _x, _t in data:

            x = Variable(self.xp.asarray(_x))
            t = Variable(self.xp.asarray(_t))

            loss = self.model(x, t)
            cumloss += loss.data

            self.optimizer.zero_grads()
            loss.backward()
            self.optimizer.update()

        return float(cumloss / data.nbatches)
Ejemplo n.º 15
0
def main():
    # arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_dir', type=str, default='data/dazai')
    parser.add_argument('--checkpoint_dir', type=str, default='model')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--rnn_size', type=int, default=128)
    parser.add_argument('--learning_rate', type=float, default=2e-3)
    parser.add_argument('--learning_rate_decay', type=float, default=0.97)
    parser.add_argument('--learning_rate_decay_after', type=int, default=10)
    parser.add_argument('--decay_rate', type=float, default=0.95)
    parser.add_argument('--dropout', type=float, default=0.0)
    parser.add_argument('--seq_length', type=int, default=50)
    parser.add_argument('--batchsize', type=int, default=50)
    parser.add_argument('--epochs', type=int, default=50)
    parser.add_argument('--grad_clip', type=int, default=5)
    parser.add_argument('--init_from', type=str, default='')
    parser.add_argument('--enable_checkpoint', type=bool, default=True)
    parser.add_argument('--file_name', type=str, default='input.txt')
    args = parser.parse_args()

    if not os.path.exists(args.checkpoint_dir):
        os.mkdir(args.checkpoint_dir)

    n_epochs = args.epochs
    n_units = args.rnn_size
    batchsize = args.batchsize
    bprop_len = args.seq_length
    grad_clip = args.grad_clip

    xp = cuda.cupy if args.gpu >= 0 else np

    train_data, words, vocab = load_data(args.data_dir, args.file_name)
    pickle.dump(vocab, open('%s/vocab.bin' % args.data_dir, 'wb'))

    if len(args.init_from) > 0:
        model = pickle.load(open(args.init_from, 'rb'))
    else:
        model = CharRNN(len(vocab), n_units)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    optimizer = optimizers.RMSprop(lr=args.learning_rate,
                                   alpha=args.decay_rate,
                                   eps=1e-8)
    #optimizer = chainer.optimizers.SGD(lr=1.0)
    optimizer.setup(model)
    optimizer.add_hook(
        chainer.optimizer.GradientClipping(grad_clip))  #勾配の上限を設定

    whole_len = train_data.shape[0]
    #jump         = whole_len / batchsize
    jump = int(whole_len / batchsize)
    epoch = 0
    start_at = time.time()
    cur_at = start_at
    state = make_initial_state(n_units, batchsize=batchsize)
    if args.gpu >= 0:
        accum_loss = Variable(xp.zeros(()).astype(np.float32))
        for key, value in state.items():
            value.data = cuda.to_gpu(value.data)
    else:
        accum_loss = Variable(xp.zeros(()).astype(np.float32))

    print('going to train {} iterations'.format(jump * n_epochs / bprop_len))
    sum_perp = 0
    count = 0
    iteration = 0
    for i in range(jump * n_epochs):
        x_batch = xp.array([
            train_data[(jump * j + i) % whole_len] for j in xrange(batchsize)
        ])
        y_batch = xp.array([
            train_data[(jump * j + i + 1) % whole_len]
            for j in xrange(batchsize)
        ])

        if args.gpu >= 0:
            x_batch = cuda.to_gpu(x_batch)
            y_batch = cuda.to_gpu(y_batch)

        state, loss_i = model.forward_one_step(x_batch,
                                               y_batch,
                                               state,
                                               dropout_ratio=args.dropout)
        accum_loss += loss_i
        count += 1

        if (i + 1) % bprop_len == 0:  # Run truncated BPTT
            iteration += 1
            sum_perp += accum_loss.data
            now = time.time()
            #print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at))
            print('{}/{}, train_loss = {}, time = {:.2f}'.format(
                (i + 1) / bprop_len, jump * n_epochs / bprop_len,
                accum_loss.data / bprop_len, now - cur_at))
            cur_at = now

            model.cleargrads()
            #optimizer.zero_grads()
            accum_loss.backward()
            accum_loss.unchain_backward()  # truncate
            #accum_loss = Variable(xp.zeros(()).astype(np.float32))
            if args.gpu >= 0:
                accum_loss = Variable(xp.zeros(()).astype(np.float32))
                #accum_loss = Variable(cuda.zeros(()))
            else:
                accum_loss = Variable(np.zeros((), dtype=np.float32))
            #optimizer.clip_grads(grad_clip)
            optimizer.update()

        if (i + 1) % 1000 == 0:
            print('epoch: ', epoch)
            print('iteration: ', iteration)
            print('training perplexity: ', np.exp(float(sum_perp) / count))
            sum_perp = 0
            count = 0

        if args.enable_checkpoint:
            if (i + 1) % 10000 == 0:
                fn = ('%s/charrnn_epoch_%.2f.chainermodel' %
                      (args.checkpoint_dir, float(i) / jump))
                pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb'))
                pickle.dump(
                    copy.deepcopy(model).to_cpu(),
                    open('%s/latest.chainermodel' % (args.checkpoint_dir),
                         'wb'))

        if (i + 1) % jump == 0:
            epoch += 1

            if epoch >= args.learning_rate_decay_after:
                optimizer.lr *= args.learning_rate_decay
                print('decayed learning rate by a factor {} to {}'.format(
                    args.learning_rate_decay, optimizer.lr))

        sys.stdout.flush()
Ejemplo n.º 16
0
    def train(self, words, steps, batchsize=100, sequence_length=10):
        """ Train the Predictor's model on words for steps number of steps. """

        whole_len = len(words)
        train_data = np.ndarray(whole_len, dtype=np.int32)
        jumps = steps * sequence_length

        # Initialize training data and maybe vocab.
        if self.vocab is None:
            vocab_initializing = True
            self.vocab = {}
        for i, word in enumerate(words):
            if vocab_initializing:
                if word not in self.vocab:
                    self.vocab[word] = len(self.vocab)
            train_data[i] = self.vocab[word]
        vocab_initializing = False

        print 'corpus length:', len(words)
        print 'self.vocab size:', len(self.vocab)

        # Initialize base model (if we need to)
        if self.model is None:
            self.model = BaseRNN(len(self.vocab), self.units)

        if self.gpu >= 0:
            cuda.get_device(self.gpu).use()
            self.model.to_self.gpu()

        optimizer = optimizers.RMSprop(lr=self.settings.learning_rate,
                                       alpha=self.settings.decay_rate,
                                       eps=1e-8)
        optimizer.setup(self.model)

        jumpsPerEpoch = whole_len / batchsize
        epoch = 0
        start_at = time.time()
        cur_at = start_at
        state = make_initial_state(self.units, batchsize=batchsize)

        if self.gpu >= 0:
            accum_loss = Variable(cuda.zeros(()))
            for _, value in state.items():
                value.data = cuda.to_self.gpu(value.data)
        else:
            accum_loss = Variable(np.zeros((), dtype=np.float32))

        print 'going to train {} iterations'.format(steps)
        for i in xrange(jumps):
            x_batch = np.array([
                train_data[(jumpsPerEpoch * j + i) % whole_len]
                for j in xrange(batchsize)
            ])
            y_batch = np.array([
                train_data[(jumpsPerEpoch * j + i + 1) % whole_len]
                for j in xrange(batchsize)
            ])

            if self.gpu >= 0:
                x_batch = cuda.to_self.gpu(x_batch)
                y_batch = cuda.to_self.gpu(y_batch)

            state, loss_i = self.model.forward_one_step(
                x_batch, y_batch, state, dropout_ratio=self.settings.dropout)
            accum_loss += loss_i

            if (i + 1) % sequence_length == 0:
                now = time.time()
                print '{}/{}, train_loss = {}, time = {:.2f}'.format(
                    (i + 1) / sequence_length, steps,
                    accum_loss.data / sequence_length, now - cur_at)
                cur_at = now

                optimizer.zero_grads()
                accum_loss.backward()
                accum_loss.unchain_backward()  # truncate
                if self.gpu >= 0:
                    accum_loss = Variable(cuda.zeros(()))
                else:
                    accum_loss = Variable(np.zeros((), dtype=np.float32))

                optimizer.clip_grads(self.settings.grad_clip)
                optimizer.update()

            if (i + 1) % jumpsPerEpoch == 0:
                epoch += 1

                if epoch >= self.settings.learning_rate_decay_after:
                    optimizer.lr *= self.settings.learning_rate_decay
                    print 'decayed self.settings.learning rate by a factor {} to {}'.format(
                        self.settings.learning_rate_decay, optimizer.lr)
    def update_core(self):
        optimizer_sd = self.get_optimizer('main')
        optimizer_enc = self.get_optimizer('enc')
        optimizer_dec = self.get_optimizer('dec')
        optimizer_dis = self.get_optimizer('dis')
        xp = self.seed.xp

        step = self.iteration % self.args.iter
        osem_step = step % self.args.osem
        if step == 0:
            batch = self.get_iterator('main').next()
            self.prImg, self.rev, self.patient_id, self.slice = self.converter(batch, self.device)
            print(self.prImg.shape)
            self.n_reconst += 1
            self.recon_freq = 1
            if ".npy" in self.args.model_image:
                self.seed.W.array = xp.reshape(xp.load(self.args.model_image),(1,1,self.args.crop_height,self.args.crop_width))
            elif ".dcm" in self.args.model_image:
                ref_dicom = dicom.read_file(self.args.model_image, force=True)
                img = xp.array(ref_dicom.pixel_array+ref_dicom.RescaleIntercept)
                img = (2*(xp.clip(img,self.args.HU_base,self.args.HU_base+self.args.HU_range)-self.args.HU_base)/self.args.HU_range-1.0).astype(np.float32)
                self.seed.W.array = xp.reshape(img,(1,1,self.args.crop_height,self.args.crop_width))
            else:
#                initializers.Uniform(scale=0.5)(self.seed.W.array)
                initializers.HeNormal()(self.seed.W.array)
            self.initial_seed = self.seed.W.array.copy()
#            print(xp.min(self.initial_seed),xp.max(self.initial_seed),xp.mean(self.initial_seed))

        ## for seed array
        arr = self.seed()
        HU = self.var2HU(arr)
        raw = self.HU2raw(HU)

        self.seed.cleargrads()
        loss_seed = Variable(xp.array([0.0],dtype=np.float32))
        # conjugate correction using system matrix
        if self.args.lambda_sd > 0:
            self.seed.W.grad = xp.zeros_like(self.seed.W.array)
            loss_sd = 0
            for i in range(len(self.prImg)):
                if self.rev[i]:
                    rec_sd = F.exp(-F.sparse_matmul(self.prMats[osem_step],F.reshape(raw[i,:,::-1,::-1],(-1,1)))) ##
                else:
                    rec_sd = F.exp(-F.sparse_matmul(self.prMats[osem_step],F.reshape(raw[i],(-1,1)))) ##
                if self.args.log:
                    loss_sd += F.mean_squared_error(F.log(rec_sd),F.log(self.prImg[i][osem_step]))
                else:
                    loss_sd += F.mean_squared_error(rec_sd,self.prImg[i][osem_step])
                if self.args.system_matrix:
                    gd = F.sparse_matmul( self.conjMats[osem_step], rec_sd-self.prImg[i][osem_step], transa=True)
                    if self.rev[i]:
                        self.seed.W.grad[i] -= self.args.lambda_sd * F.reshape(gd, (1,self.args.crop_height,self.args.crop_width)).array[:,::-1,::-1]    # / logrep.shape[0] ?
                    else:
                        self.seed.W.grad[i] -= self.args.lambda_sd * F.reshape(gd, (1,self.args.crop_height,self.args.crop_width)).array    # / logrep.shape[0] ?

            if not self.args.system_matrix:
                (self.args.lambda_sd *loss_sd).backward()
            chainer.report({'loss_sd': loss_sd/len(self.prImg)}, self.seed)

        if self.args.lambda_tvs > 0:
            loss_tvs = losses.total_variation(arr, tau=self.args.tv_tau, method=self.args.tv_method)
            loss_seed += self.args.lambda_tvs * loss_tvs
            chainer.report({'loss_tvs': loss_tvs}, self.seed)

        if self.args.lambda_advs>0:
            L_advs = F.average( (self.dis(arr)-1.0)**2 )
            loss_seed += self.args.lambda_advs * L_advs
            chainer.report({'loss_advs': L_advs}, self.seed)

        ## generator output
        arr_n = losses.add_noise(arr,self.args.noise_gen)
        if self.args.no_train_seed:
            arr_n.unchain()
        if not self.args.decoder_only:
            arr_n = self.encoder(arr_n)
        gen = self.decoder(arr_n) # range = [-1,1]

        ## generator loss
        loss_gen = Variable(xp.array([0.0],dtype=np.float32))
        plan, plan_ae = None, None
        if self.args.lambda_ae1>0 or self.args.lambda_ae2>0:
            plan = losses.add_noise(Variable(self.converter(self.get_iterator('planct').next(), self.device)), self.args.noise_dis)
            plan_enc = self.encoder(plan)
            plan_ae = self.decoder(plan_enc)
            loss_ae1 = F.mean_absolute_error(plan,plan_ae)
            loss_ae2 = F.mean_squared_error(plan,plan_ae)
            if self.args.lambda_reg>0:
                loss_reg_ae = losses.loss_func_reg(plan_enc[-1],'l2')
                chainer.report({'loss_reg_ae': loss_reg_ae}, self.seed)
                loss_gen += self.args.lambda_reg * loss_reg_ae
            loss_gen += self.args.lambda_ae1 * loss_ae1 + self.args.lambda_ae2 * loss_ae2
            chainer.report({'loss_ae1': loss_ae1}, self.seed)
            chainer.report({'loss_ae2': loss_ae2}, self.seed)
        if self.args.lambda_tv > 0:
            L_tv = losses.total_variation(gen, tau=self.args.tv_tau, method=self.args.tv_method)
            loss_gen += self.args.lambda_tv * L_tv
            chainer.report({'loss_tv': L_tv}, self.seed)
        if self.args.lambda_adv>0:
            L_adv = F.average( (self.dis(gen)-1.0)**2 )
            loss_gen += self.args.lambda_adv * L_adv
            chainer.report({'loss_adv': L_adv}, self.seed)
        ## regularisation on the latent space
        if self.args.lambda_reg>0:
            loss_reg = losses.loss_func_reg(arr_n[-1],'l2')
            chainer.report({'loss_reg': loss_reg}, self.seed)
            loss_gen += self.args.lambda_reg * loss_reg

        self.encoder.cleargrads()
        self.decoder.cleargrads()
        loss_gen.backward()
        loss_seed.backward()
        chainer.report({'loss_gen': loss_gen}, self.seed)
        optimizer_enc.update()
        optimizer_dec.update()
        optimizer_sd.update()

        chainer.report({'grad_sd': F.average(F.absolute(self.seed.W.grad))}, self.seed)
        if hasattr(self.decoder, 'latent_fc'):
            chainer.report({'grad_gen': F.average(F.absolute(self.decoder.latent_fc.W.grad))}, self.seed)

        # reconstruction consistency for NN
        if (step % self.recon_freq == 0) and self.args.lambda_nn>0:
            self.encoder.cleargrads()
            self.decoder.cleargrads()
            self.seed.cleargrads()
            gen.grad = xp.zeros_like(gen.array)

            HU_nn = self.var2HU(gen)
            raw_nn = self.HU2raw(HU_nn)
            loss_nn = 0
            for i in range(len(self.prImg)):
                if self.rev[i]:
                    rec_nn = F.exp(-F.sparse_matmul(self.prMats[osem_step],F.reshape(raw_nn[i,:,::-1,::-1],(-1,1))))
                else:
                    rec_nn = F.exp(-F.sparse_matmul(self.prMats[osem_step],F.reshape(raw_nn[i],(-1,1))))
                loss_nn += F.mean_squared_error(rec_nn,self.prImg[i][osem_step])
                if self.args.system_matrix:
                    gd_nn = F.sparse_matmul( rec_nn-self.prImg[i][osem_step], self.conjMats[osem_step], transa=True )
                    if self.rev[i]:
                        gen.grad[i] -= self.args.lambda_nn * F.reshape(gd_nn, (1,self.args.crop_height,self.args.crop_width)).array[:,::-1,::-1]
                    else:
                        gen.grad[i] -= self.args.lambda_nn * F.reshape(gd_nn, (1,self.args.crop_height,self.args.crop_width)).array
            chainer.report({'loss_nn': loss_nn/len(self.prImg)}, self.seed)
            if self.args.system_matrix:
                gen.backward()
            else:
                (self.args.lambda_nn * loss_nn).backward()

            if not self.args.no_train_seed:
                optimizer_sd.update()
            if not self.args.no_train_enc:
                optimizer_enc.update()
            if not self.args.no_train_dec:
                optimizer_dec.update()

            if self.seed.W.grad is not None:
                chainer.report({'grad_sd_consistency': F.average(F.absolute(self.seed.W.grad))}, self.seed)
            if hasattr(self.decoder, 'latent_fc'):
                chainer.report({'grad_gen_consistency': F.average(F.absolute(self.decoder.latent_fc.W.grad))}, self.seed)
            elif hasattr(self.decoder, 'ul'):
                chainer.report({'grad_gen_consistency': F.average(F.absolute(self.decoder.ul.c1.c.W.grad))}, self.seed)

        chainer.report({'seed_diff': F.mean_absolute_error(self.initial_seed,self.seed.W)/F.mean_absolute_error(self.initial_seed,xp.zeros_like(self.initial_seed))}, self.seed)

        # clip seed to [-1,1]
        if self.args.clip:
            self.seed.W.array = xp.clip(self.seed.W.array,a_min=-1.0, a_max=1.0)

        # adjust consistency loss update frequency
        self.recon_freq = max(1,int(round(self.args.max_reconst_freq * (step-self.args.reconst_freq_decay_start) / (self.args.iter+1-self.args.reconst_freq_decay_start))))

        ## for discriminator
        fake = None
        if self.args.dis_freq > 0 and ( (step+1) % self.args.dis_freq == 0) and (self.args.lambda_gan+self.args.lambda_adv+self.args.lambda_advs>0):
            # get mini-batch
            if plan is None:
                plan = self.converter(self.get_iterator('planct').next(), self.device)
                plan = losses.add_noise(Variable(plan),self.args.noise_dis)
            
            # create fake
            if self.args.lambda_gan>0:
                if self.args.decoder_only:
                    fake_seed = xp.random.uniform(-1,1,(1,self.args.latent_dim)).astype(np.float32)
                else:
                    fake_seed = self.encoder(xp.random.uniform(-1,1,(1,1,self.args.crop_height,self.args.crop_width)).astype(np.float32))
                fake = self.decoder(fake_seed)
                # decoder
                self.decoder.cleargrads()
                loss_gan = F.average( (self.dis(fake)-1.0)**2 )
                chainer.report({'loss_gan': loss_gan}, self.seed)
                loss_gan *= self.args.lambda_gan
                loss_gan.backward()
                optimizer_dec.update(loss=loss_gan)
                fake_copy = self._buffer.query(fake.array)
            if self.args.lambda_nn>0:
                fake_copy = self._buffer.query(self.converter(self.get_iterator('mvct').next(), self.device))
            if (step+1) % (self.args.iter // 30):
                fake_copy = Variable(self._buffer.query(gen.array))
            # discriminator
            L_real = F.average( (self.dis(plan)-1.0)**2 )
            L_fake = F.average( self.dis(fake_copy)**2 )
            loss_dis = 0.5*(L_real+L_fake)
            self.dis.cleargrads()
            loss_dis.backward()
            optimizer_dis.update()
            chainer.report({'loss_dis': (L_real+L_fake)/2}, self.seed)


        if ((self.iteration+1) % self.args.vis_freq == 0) or  ((step+1)==self.args.iter):
            for i in range(self.args.batchsize):
                outlist=[]
                if not self.args.no_train_seed and not self.args.decoder_only:
                    outlist.append((self.seed()[i],"0sd"))
                if plan_ae is not None:
                    outlist.append((plan[i],'2pl'))
                    outlist.append((plan_ae[i],'3ae'))
                if self.args.lambda_nn>0 or self.args.lambda_adv>0:
                    if self.args.decoder_only:
                        gen_img = self.decoder([self.seed()])
                    else:
                        gen_img = self.decoder(self.encoder(self.seed()))
                    outlist.append((gen_img[i],'1gn'))
                if fake is not None:
                    outlist.append((fake[i],'4fa'))
                for out,typ in outlist:
                    out.to_cpu()
                    HU = (((out+1)/2 * self.args.HU_range)+self.args.HU_base).array  # [-1000=air,0=water,>1000=bone]
                    print("type: ",typ,"HU:",np.min(HU),np.mean(HU),np.max(HU))
                    #visimg = np.clip((out.array+1)/2,0,1) * 255.0
                    b,r = -self.args.HU_range_vis//2,self.args.HU_range_vis
                    visimg = (np.clip(HU,b,b+r)-b)/r * 255.0
                    fn = 'n{:0>5}_iter{:0>6}_p{}_z{}_{}'.format(self.n_reconst,step+1,self.patient_id[i],self.slice[i],typ)
                    write_image(np.uint8(visimg),os.path.join(self.args.out,fn+'.jpg'))
                    if (step+1)==self.args.iter or (not self.args.no_save_dcm):
                        #np.save(os.path.join(self.args.out,fn+'.npy'),HU[0])
                        write_dicom(os.path.join(self.args.out,fn+'.dcm'),HU[0])
Ejemplo n.º 18
0
list_sentences = [np.array(row, np.int32) for row in list_sentences]


opt1 = SGD_Embedid() # 確率的勾配法を使用
opt2 = SGD() # 確率的勾配法を使用
opt1.setup(model1) # 学習器の初期化
opt2.setup(model2) # 学習器の初期化
opt1.tuples[0][1].fill(0)
opt2.zero_grads()
random.shuffle(list_sentences)
list_minibatch = []
for i, sentence in enumerate(list_sentences):
    list_minibatch.append(sentence)
    if len(list_minibatch) == BATCH_SIZE:
        accum_loss_total = Variable(np.zeros((), dtype=np.float32)) # 累積損失の初期値
        uniq_sentence = np.zeros((), np.int32)
        for batch_sentence in list_minibatch:
            accum_loss_total += forward(batch_sentence) # 損失の計算
            uniq_sentence = np.append(uniq_sentence, batch_sentence)
        accum_loss_total.backward() # 誤差逆伝播
        opt1.clip_grads(10) # 大きすぎる勾配を抑制
        opt2.clip_grads(10) # 大きすぎる勾配を抑制
        uniq_sentence = np.unique(uniq_sentence)
        opt1.update(uniq_sentence) # パラメータの更新
        opt2.update() # パラメータの更新
        opt1.zero_grads(uniq_sentence) # 勾配の初期化
        opt2.zero_grads() # 勾配の初期化
        list_minibatch = []
    if i % 1000 == 999:
            break
Ejemplo n.º 19
0
    def _train(self, **kwargs):
            gpu = -1 if "gpu" not in kwargs else kwargs["gpu"]
            lr = 2e-3 if "lr" not in kwargs else kwargs["lr"]
            lr_decay = 0.97 if "lr_decay" not in kwargs else kwargs["lr_decay"]
            lr_decay_after=10 if "lr_decay_after" not in kwargs else kwargs["lr_decay_after"]
            decay_rate = 0.95 if "decay_rate" not in kwargs else kwargs["decay_rate"]
            dropout = 0.0 if "dropout" not in kwargs else kwargs["dropout"]
            bprop_len   = 50 if "bprop_len" not in kwargs else kwargs["bprop_len"]
            batchsize   = 50 if "batchsize" not in kwargs else kwargs["batchsize"]
            grad_clip   = 5 if "grad_clip" not in kwargs else kwargs["grad_clip"]
            n_epochs  = 5 if "epochs" not in kwargs else kwargs["epochs"]
            if gpu >= 0:
                cuda.get_device(gpu).use()
                self.model.to_gpu()

            optimizer = optimizers.RMSprop(lr=lr, alpha=decay_rate, eps=1e-8)
            optimizer.setup(self.model)

            train_data = self.dataset
            whole_len    = train_data.shape[0]
            jump         = whole_len // batchsize
            epoch        = 0
            start_at     = time.time()
            cur_at       = start_at
            state        = self.model.make_initial_state(batchsize=batchsize)

            if gpu >= 0:
                accum_loss   = Variable(cuda.zeros(()))
                for key, value in state.items():
                    value.data = cuda.to_gpu(value.data)#plist
            else:
                accum_loss   = Variable(np.zeros((), dtype=np.float32))

            print ('going to train {} iterations'.format(jump * n_epochs))

            for i in range(jump * n_epochs):
                x_batch = np.array([train_data[(jump * j + i) % whole_len]
                                    for j in range(batchsize)])
                y_batch = np.array([train_data[(jump * j + i + 1) % whole_len]
                                    for j in range(batchsize)])

                if gpu >=0:
                    x_batch = cuda.to_gpu(x_batch)
                    y_batch = cuda.to_gpu(y_batch)

                state, loss_i = self.model.forward_one_step(x_batch, y_batch, state, dropout_ratio=dropout)
                accum_loss   += loss_i

                if (i + 1) % bprop_len == 0:  # Run truncated BPTT
                    now = time.time()
                    sys.stderr.write('\r{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)//bprop_len,(jump*n_epochs)//bprop_len, accum_loss.data / bprop_len, now-cur_at))
                    sys.stderr.flush()
                    cur_at = now

                    optimizer.zero_grads()
                    accum_loss.backward()
                    accum_loss.unchain_backward()  # truncate

                    if gpu >= 0:
                        accum_loss = Variable(cuda.zeros(()))
                    else:
                        accum_loss = Variable(np.zeros((), dtype=np.float32))

                    optimizer.clip_grads(grad_clip)
                    optimizer.update()

                    if (i + 1) % 10000 == 0:
                        pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))

                    if (i + 1) % jump == 0:
                        epoch += 1

                    if epoch >= lr_decay_after:
                        optimizer.lr *= lr_decay
                        print ('decayed learning rate by a factor {} to {}'.format(lr_decay, optimizer.lr))
                sys.stdout.flush()

            pickle.dump(copy.deepcopy(self.model).to_cpu(), open(self.model_path, 'wb'))
Ejemplo n.º 20
0
def train_encoder(model,
                  dictionary: corpora.Dictionary,
                  sentence_file: str,
                  model_dir: str,
                  epoch_size: int = 100,
                  batch_size: int = 30,
                  dropout: bool = True,
                  gpu: bool = False) -> None:
    if gpu >= 0:
        model.to_gpu()
        print(model.xp)

    # setup SGD optimizer
    opt = optimizers.SGD()
    opt.setup(model)

    # optimizer hooks
    clip_threshold = 5.0
    print("set optimizer clip threshold: {}".format(clip_threshold))
    opt.add_hook(chainer.optimizer.GradientClipping(clip_threshold))

    # load conversation sentences
    sentences = load_sentence(sentence_file)
    data_size = len(sentences)

    print("data size: {}".format(data_size))
    for epoch in range(epoch_size):
        print("epoch {}".format(epoch))

        indexes = np.random.permutation(data_size)
        epoch_loss = 0  # int

        for bat_i in range(0, data_size, batch_size):
            forward_start_time = datetime.now()
            batch_loss = Variable(model.xp.zeros((), dtype=model.xp.float32))

            for index in indexes[bat_i:bat_i + batch_size]:
                input_words = sentences[index]
                # id のリストに変換する
                input_words_with_s = tokens2ids(input_words,
                                                dictionary,
                                                verbose=False)

                # フォワード
                try:
                    new_loss = model(input_words_with_s,
                                     dropout=dropout,
                                     state=None,
                                     train=True)
                    if model.xp.isnan(new_loss.data):
                        sys.exit(1)

                    batch_loss += new_loss
                except Exception:
                    print(index, input_words_with_s)
                    import traceback
                    traceback.print_exc()

            # 平均化
            batch_size_array = model.xp.array(batch_size,
                                              dtype=model.xp.float32)
            # if gpu:
            #     batch_size_array = cuda.to_gpu(batch_size_array)
            batch_loss = batch_loss / Variable(batch_size_array)
            epoch_loss += batch_loss.data

            # 時間計測
            forward_end_time = datetime.now()

            # 最適化
            opt_start_time = datetime.now()
            model.zerograds()
            batch_loss.backward()
            opt.update()
            opt_end_time = datetime.now()

            forward_delta = forward_end_time - forward_start_time
            opt_delta = opt_end_time - opt_start_time

            print_fmt = ("epoch {} batch {}: "
                         "loss {}, grad L2 norm: {}, forward {}, optimizer {}")
            print(
                print_fmt.format(
                    epoch,
                    int(bat_i / batch_size),
                    batch_loss.data,
                    opt.compute_grads_norm(),
                    forward_delta,
                    opt_delta,
                ))
            # save
            if ((bat_i / batch_size) + 1) % 100 == 0:
                serializers.save_npz(os.path.join(model_dir, "model.npz"),
                                     model)
            if ((bat_i / batch_size) + 1) % 1000 == 0:
                serializers.save_npz(
                    os.path.join(
                        model_dir, "model_{}_{}_{}.npz".format(
                            epoch,
                            int(bat_i / batch_size) + 1,
                            datetime.now().strftime("%Y%m%d-%H%M%S"))), model)
        print("finish epoch {}, loss {}".format(epoch,
                                                epoch_loss / epoch_size))
        # save
        serializers.save_npz(os.path.join(model_dir, "model.npz"), model)
        serializers.save_npz(
            os.path.join(
                model_dir, "model_{}_{}_{}.npz".format(
                    epoch,
                    int(bat_i / batch_size) + 1,
                    datetime.now().strftime("%Y%m%d-%H%M%S"))), model)
Ejemplo n.º 21
0
def main():
    args      = parse_args()
    init_program_state(args)
    vocab     = make_vocab()
    data, batched_data = load_data(args.train, vocab, args.batch_size)
    dev , batched_dev  = load_data(args.dev, vocab, 1)
    test, batched_test = load_data(args.test, vocab, 1)
    model = init_model(input_size = len(vocab),
            embed_size   = args.embed_size,
            hidden_size  = args.hidden_size,
            output_size  = len(vocab))
    optimizer = optimizers.SGD(lr=args.lr)
    
    # Begin Training
    UF.init_model_parameters(model)
    model = UF.convert_to_GPU(USE_GPU, model)
    optimizer.setup(model)
    
    batchsize  = args.batch_size
    epoch      = args.epoch
    accum_loss = Variable(xp.zeros((), dtype=np.float32))
    counter    = 0
    # For each epoch..
    for ep in range(epoch):
        UF.trace("Training Epoch %d" % ep)
        total_tokens = 0
        log_ppl      = 0.0
        
        # For each batch, do forward & backward computations
        for i, batch in enumerate(batched_data):
            loss, nwords  = forward(model, batch)
            accum_loss   += loss
            log_ppl      += loss.data.reshape(())
            # Tracing...
            total_tokens += nwords
#            UF.trace('  %d/%d = %.5f' % (min(i*batchsize, len(data)), len(data), loss.data.reshape(())*batchsize))
            # Counting
            if (counter+1) % bp_len == 0:
                optimizer.zero_grads()
                accum_loss.backward()
                accum_loss.unchain_backward()
                accum_loss = Variable(xp.zeros((), dtype=np.float32))
                
                optimizer.clip_grads(grad_clip)
                optimizer.update()
            counter += 1
        # Counting Perplexity
        log_ppl /= total_tokens
        UF.trace("  PPL (Train)  = %.10f" % math.exp(UF.to_cpu(USE_GPU, log_ppl)))
        dev_ppl = evaluate(model, batched_dev)
        UF.trace("  PPL (Dev)    = %.10f" % math.exp(UF.to_cpu(USE_GPU, dev_ppl)))

        # Reducing learning rate
        if ep > 6:
            optimizer.lr /= 1.2
            UF.trace("Reducing LR:", optimizer.lr)

    # Begin Testing
    UF.trace("Begin Testing...")
    test_ppl = evaluate(model, batched_test)
    UF.trace("  log(PPL) = %.10f" % test_ppl)
    UF.trace("  PPL      = %.10f" % math.exp(UF.to_cpu(USE_GPU, test_ppl)))
Ejemplo n.º 22
0
def train_encoder_decoder(model,
                          dictionary: corpora.Dictionary,
                          conversation_file: str,
                          decoder_model_dir: str,
                          epoch_size: int = 100,
                          batch_size: int = 30,
                          dropout: bool = False,
                          gpu: bool = False) -> None:
    if gpu >= 0:
        model.to_gpu()
        print(model.xp)

    # setup SGD optimizer
    # opt = optimizers.SGD()
    opt = optimizers.Adam()
    opt.setup(model)

    # optimizer hooks
    clip_threshold = 5.0
    print("set optimizer clip threshold: {}".format(clip_threshold))
    opt.add_hook(chainer.optimizer.GradientClipping(clip_threshold))

    # load conversation sentences
    conversation = load_conversation(conversation_file, dictionary)
    data_size = len(conversation)

    print("data size: {}".format(data_size))
    for epoch in range(epoch_size):
        print("running epoch {}".format(epoch))
        indexes = np.random.permutation(range(data_size))
        epoch_loss = 0  # int

        for bat_i in range(0, data_size, batch_size):
            forward_start_time = datetime.now()

            for index in indexes[bat_i:bat_i + batch_size]:
                pair_words = conversation[index]

                # encoder input words
                orig_words = pair_words[0][:-1]  # remove END_SYMBOL
                reply_words = pair_words[1]
                if orig_words:
                    assert orig_words[-1] is not config.END_SYMBOL
                input_words_with_s = tokens2ids(orig_words, dictionary)
                ys, state = model.predictor.forward([
                    Variable(model.xp.array([word], dtype=model.xp.int32))
                    for word in input_words_with_s
                ],
                                                    state=None,
                                                    dropout=dropout,
                                                    train=True)

                # decode
                assert reply_words[0] == config.END_SYMBOL
                assert reply_words[-1] == config.END_SYMBOL
                output_words_with_s = tokens2ids(reply_words, dictionary)
                batch_loss = Variable(model.xp.zeros((), dtype=np.float32))
                try:
                    new_loss = model(
                        output_words_with_s,
                        state=state,  # init_state を input の state にする
                        dropout=dropout,
                        train=True)
                    batch_loss += new_loss
                except Exception:
                    print(index, input_words_with_s)
                    import traceback
                    traceback.print_exc()
            # 平均化
            batch_size_array = model.xp.array(batch_size,
                                              dtype=model.xp.float32)
            batch_loss = batch_loss / Variable(batch_size_array)
            epoch_loss += batch_loss.data

            # 時間計測
            forward_end_time = datetime.now()

            # 最適化
            opt_start_time = datetime.now()
            model.zerograds()
            batch_loss.backward()
            opt.update()
            opt_end_time = datetime.now()

            forward_delta = forward_end_time - forward_start_time
            opt_delta = opt_end_time - opt_start_time
            # print(
            #     ("decoder epoch {} batch {}: loss {}, "
            #      "forward {}, optimizer {},").format(
            #         epoch,
            #         int(bat_i / batch_size),
            #         batch_loss.data,
            #         forward_delta,
            #         opt_delta,
            #     )
            # )
            print_fmt = ("epoch {} batch {}: "
                         "loss {}, grad L2 norm: {}, forward {}, optimizer {}")
            print(
                print_fmt.format(
                    epoch,
                    int(bat_i / batch_size),
                    batch_loss.data,
                    opt.compute_grads_norm(),
                    forward_delta,
                    opt_delta,
                ))
            # save
            if ((bat_i / batch_size) + 1) % 100 == 0:
                serializers.save_npz(
                    os.path.join(decoder_model_dir, "model.npz"), model)
            if ((bat_i / batch_size) + 1) % 1000 == 0:
                serializers.save_npz(
                    os.path.join(
                        decoder_model_dir, "model_{}_{}_{}.npz".format(
                            epoch,
                            int(bat_i / batch_size) + 1,
                            datetime.now().strftime("%Y%m%d-%H%M%S"))), model)
        print("finish epoch {}, loss {}".format(
            epoch, epoch_loss / math.ceil(data_size / batch_size)))
        # save
        serializers.save_npz(os.path.join(decoder_model_dir, "model.npz"),
                             model)
        serializers.save_npz(
            os.path.join(
                decoder_model_dir, "model_{}_{}_{}.npz".format(
                    epoch,
                    int(bat_i / batch_size) + 1,
                    datetime.now().strftime("%Y%m%d-%H%M%S"))), model)
Ejemplo n.º 23
0
def generate_image(img_orig,
                   img_style,
                   width,
                   nw,
                   nh,
                   max_iter,
                   lr,
                   img_gen=None):
    #arrays which is in neaural net  of original image
    mid_orig = nn.forward(Variable(img_orig))
    #shape of style_mats is ( 4, ch, ch)
    style_mats = [get_matrix(y) for y in nn.forward(Variable(img_style))]
    #set the initial image
    if args.initial_img != 'nothing':
        img_gen = image_resize(args.initial_img, W)

    if img_gen is None:

        img_gen = np.random.uniform(-20, 20,
                                    (1, 3, width, width)).astype(np.float32)
    img_gen = chainer.links.Parameter(img_gen)
    optimizer = optimizers.Adam(alpha=lr)
    #optimize the img_gen
    optimizer.setup(img_gen)
    for i in range(max_iter):
        img_gen.zerograds()

        x = img_gen.W
        # y is arrays of discrmination model whose input is img_gen
        y = nn.forward(x)

        L = Variable(xp.zeros((), dtype=np.float32))
        for l in range(len(y)):
            ch = y[l].data.shape[1]
            wd = y[l].data.shape[2]
            gogh_y = F.reshape(y[l], (ch, wd**2))

            #correratoin of the y
            gogh_matrix = F.matmul(gogh_y, gogh_y, transb=True) / np.float32(
                ch * wd**2)
            #differance between y and mid_orig
            #deep layer may be imoirtant for figuring shapes of objects
            L1 = (l + 1) * 0.3 * np.float32(args.lam) * np.float32(
                nn.alpha[l]) * F.mean_squared_error(y[l],
                                                    Variable(mid_orig[l].data))
            #differance between gogh_matrix and style_mats <- these two mats are correlations
            L2 = np.float32(nn.beta[l]) * F.mean_squared_error(
                gogh_matrix, Variable(style_mats[l].data)) / np.float32(len(y))
            L += L1 + L2

            if i % 100 == 0:
                print(i, l, L1.data, L2.data)

        L.backward()
        img_gen.W.grad = x.grad
        optimizer.update()

        tmp_shape = x.data.shape
        if args.gpu >= 0:
            img_gen.W.data += Clip().forward(
                img_gen.W.data).reshape(tmp_shape) - img_gen.W.data
        else:

            def clip(x):
                return -120 if x < -120 else (136 if x > 136 else x)

            img_gen.W.data += np.vectorize(clip)(
                img_gen.W.data).reshape(tmp_shape) - img_gen.W.data

        if i % 50 == 0:
            save_image(img_gen.W.data, W, nw, nh, i)

        if int(i) == int(max_iter - 1):
            chainer.serializers.save_npz('final_img.npz', img_gen)
Ejemplo n.º 24
0
pg0=get_batch6.GET_BATCH6(band_num=BAND_BUNKATU0, seq_num=SEQUENCE_LEN0,n_delay=NDELAY,npoint=NPOINT0,fs0=FS0,fc0=FC1, gain0=GAIN1, q0=Q1)
a1=pg0.a1
b1=pg0.b1

loss   = Variable(np.zeros((), dtype=np.float32))
losses =[]

NUMBER_ITERATION=501

for i in range(NUMBER_ITERATION):
	
	x,y = pg0.get1()  # get train data
	loss, state =  rnn.compute_loss(model, x, y, state)  # do one sequence while batch bands
	model.cleargrads()
	loss.backward()
	optimizer.update()
	
	losses.append(loss.data /(SEQUENCE_LEN0 * 1.0))  # total loss  while one BAND_BUNKATU0
	
	state = rnn.make_initial_state( batchsize=BAND_BUNKATU0 )  	# clear for next batch-sequence-input
	
	if i%20==0:
		plt.plot(losses,"b")
		plt.yscale('log')
		plt.title('loss')
		plt.pause(1.0)
		print "loss.data (%06d)="%i, loss.data / (SEQUENCE_LEN0 * 1.0)
	##if i%100==0:  # save model parameter in the directory model20  every 100 
	##	serializers.save_npz('model20/%06d_my.model.npz'%i, model)
Ejemplo n.º 25
0
    def train(self,
              train_data,
              train_input,
              test_data,
              test_input,
              n_epochs,
              filename=None,
              KL_loss=False,
              Add_training=False):
        """
        
        :param train_data: data in the form n_batches x batch_size x n_steps x n_outputs
        :param test_data: data in the form n_batches x batch_size x n_steps x n_outputs
        :param n_epochs: nr of training epochs
        :param dec_input: this is the input to the decoder, which can modulate input dynamics; size: step_size x n_inputs
        :return:
        """

        # keep track of loss
        train_loss = np.zeros(n_epochs)
        test_loss = np.zeros(n_epochs)
        batches_loss = np.zeros(train_data.shape[0] * n_epochs)

        # keep track of learned alphas and weights
        if self.model.mode is not 'Static':
            learning_alphasS = np.empty(
                (n_epochs + 1, self.model.hidden.alphaS.alpha.size))
            learning_alphasR = np.empty(
                (n_epochs + 1, self.model.hidden.alphaR.alpha.size))
            learning_alphasS[0, :] = self.model.hidden.alphaS.alpha
            learning_alphasR[0, :] = self.model.hidden.alphaR.alpha

#        saved_U_fast = np.zeros((n_epochs,self.model.n_fast, self.model.n_slow+self.model.n_inout))
#        saved_U_inout= np.zeros((n_epochs, self.model.n_inout, self.model.n_fast))
#        saved_W_fast = np.zeros((n_epochs, self.model.n_fast, self.model.n_fast))
#        saved_W_inout= np.zeros((n_epochs, self.model.n_inout, self.model.n_inout))

        index = 0  #for batches_wise loss
        best_loss = 4000

        if Add_training:
            self.optimizer.setup(self.model.slow)

    #self.model.inout.W.W.data = np.zeros((25,25))  #NO RECURRENT CONNECTION IN OUTPUT LAYER!

        for epoch in tqdm.tqdm(xrange(n_epochs)):
            #for epoch in xrange(n_epochs):
            with chainer.using_config('train', True):

                n_batches = train_data.shape[0]
                batch_size = train_data.shape[1]
                n_steps = train_data.shape[2]

                for i in range(n_batches):
                    #print('Sample number %i' %i)
                    loss = Variable(self.xp.array(0, 'float32'))
                    self.model.reset_state()

                    #initialization for this batch
                    data0 = Variable(train_data[i, :, 0, :])

                    self.model.hidden.initialize_state(batch_size)
                    #self.model.readout.initialize_state(batch_size)

                    for t in xrange(0, n_steps, 1):
                        x = Variable(train_input[i, :, t, :])
                        data = self.xp.array(train_data[i, :, t, :])
                        _loss = mean_squared_error(self.model(x),
                                                   data)  # prediction mode
                        if KL_loss:
                            _loss = self.KL_divergence(self.model(), data)
                        #print _loss
                        train_loss[epoch] += cuda.to_cpu(_loss.data)
                        loss += _loss

                    batches_loss[index] = loss.data
                    index = index + 1

                    self.model.cleargrads(
                    )  #look into this function to clear grad of a whole link
                    loss.backward()
                    loss.unchain_backward()

                    #self.model.inout.W.disable_update() #NO RECURRENT CONNECTIONS IN OUTPUT LAYER!
                    #if self.model.mode == 'Static':
                    #    self.model.hidden.alphaS.disable_update()
                    #    self.model.hidden.alphaR.disable_update()

                    if Add_training:  #delete grads to be deleted! or use enable_update()
                        self.model.fast.U1.disable_update()
                        self.model.fast.W.disable_update()
                        self.model.inout.disable_update()
                        self.model.slow.W.disable_update()
                    self.optimizer.update()
                    #print 'UPDATE'

#            saved_U_fast[epoch,:,:] = self.model.fast.U.W.data
#            saved_U_inout[epoch,:,:]= self.model.inout.U.W.data
#            saved_W_fast[epoch,:,:] = self.model.fast.W.W.data
#            saved_W_inout[epoch, :,:]= self.model.inout.W.W.data
#
#            #save learning of time constants
            if self.model.mode is not 'Static':
                learning_alphasS[epoch +
                                 1, :] = self.model.hidden.alphaS.alpha.data
                learning_alphasR[epoch +
                                 1, :] = self.model.hidden.alphaR.alpha.data

            # compute loss per epoch
            train_loss[epoch] /= (n_batches * batch_size * self.model.n_out)

            # save model at some epoch
            #epochs_save = np.linspace(0, n_epochs-n_epochs/10, num=10, dtype=int)
            #if epoch in epochs_save:
            #    thisname = 'model_at_epoch_%i' %epoch
            #    self.save('saved_models/'+filename+'/'+thisname)

            # validation
            with chainer.using_config('train', False):

                n_batches = test_data.shape[0]
                batch_size = test_data.shape[1]
                n_steps = test_data.shape[2]
                #                assert(n_steps == n_clamp+n_pred)

                for i in range(n_batches):

                    self.model.reset_state()
                    data0 = Variable(test_data[i, :, t, :])

                    self.model.hidden.initialize_state(batch_size)
                    #                    self.model.readout.initialize_state(batch_size)

                    for t in xrange(0, n_steps, 1):
                        x = Variable(test_input[i, :, t, :])
                        data = self.xp.array(test_data[i, :, t, :])
                        _loss = mean_squared_error(self.model(x),
                                                   data)  # prediction mode
                        if KL_loss:
                            _loss = self.KL_divergence(self.model(), data)
                        test_loss[epoch] += cuda.to_cpu(_loss.data)

            # compute loss per epoch
            test_loss[epoch] /= (n_batches * batch_size * self.model.n_out)

            #method do avoid overfitting
            if test_loss[epoch] < best_loss:
                best_loss = test_loss[epoch]
                self.save('saved_models/' + filename + '/best')
                np.save('saved_models/' + filename + '/conv_epoch', epoch)
            # end of training cycle

            np.save('saved_models/' + filename + '/best_loss', best_loss)
        if self.model.mode is not 'Static':
            np.save('saved_models/' + filename + '/learning_alphaS',
                    learning_alphasS)
            np.save('saved_models/' + filename + '/learning_alphaR',
                    learning_alphasR)
#        np.save('saved_U_fast', saved_U_fast)
#        np.save('saved_W_fast', saved_W_fast)
#        np.save('saved_U_inout', saved_U_inout)
#        np.save('saved_W_inout', saved_W_inout)
#        np.save('saved_models/'+filename+'/saved_alphas_fast', learning_alphas_fast)
#        np.save('saved_models/'+filename+'/saved_alphas_slow', learning_alphas_slow)
#        np.save('saved_models/'+filename+'/saved_alphas_inout', learning_alphas_inout)
#

        return train_loss, test_loss, batches_loss
Ejemplo n.º 26
0
    def train(self, words, steps, batchsize=100, sequence_length=10):
        """ Train the Predictor's model on words for steps number of steps. """

        whole_len = len(words)
        train_data = np.ndarray(whole_len, dtype=np.int32)
        jumps = steps * sequence_length

        # Initialize training data and maybe vocab.
        if self.vocab is None:
            vocab_initializing = True
            self.vocab = {}
        for i, word in enumerate(words):
            if vocab_initializing:
                if word not in self.vocab:
                    self.vocab[word] = len(self.vocab)
            train_data[i] = self.vocab[word]
        vocab_initializing = False


        print 'corpus length:', len(words)
        print 'self.vocab size:', len(self.vocab)

        # Initialize base model (if we need to)
        if self.model is None:
            self.model = BaseRNN(len(self.vocab), self.units)

        if self.gpu >= 0:
            cuda.get_device(self.gpu).use()
            self.model.to_self.gpu()

        optimizer = optimizers.RMSprop(lr=self.settings.learning_rate,
                                       alpha=self.settings.decay_rate,
                                       eps=1e-8)
        optimizer.setup(self.model)

        jumpsPerEpoch = whole_len / batchsize
        epoch = 0
        start_at = time.time()
        cur_at = start_at
        state = make_initial_state(self.units, batchsize=batchsize)

        if self.gpu >= 0:
            accum_loss = Variable(cuda.zeros(()))
            for _, value in state.items():
                value.data = cuda.to_self.gpu(value.data)
        else:
            accum_loss = Variable(np.zeros((), dtype=np.float32))

        print 'going to train {} iterations'.format(steps)
        for i in xrange(jumps):
            x_batch = np.array([train_data[(jumpsPerEpoch * j + i) % whole_len]
                                for j in xrange(batchsize)])
            y_batch = np.array([train_data[(jumpsPerEpoch * j + i + 1) % whole_len]
                                for j in xrange(batchsize)])

            if self.gpu >= 0:
                x_batch = cuda.to_self.gpu(x_batch)
                y_batch = cuda.to_self.gpu(y_batch)


            state, loss_i = self.model.forward_one_step(x_batch,
                                                        y_batch,
                                                        state,
                                                        dropout_ratio=self.settings.dropout)
            accum_loss += loss_i

            if (i + 1) % sequence_length == 0:
                now = time.time()
                print '{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/sequence_length, steps, accum_loss.data / sequence_length, now-cur_at)
                cur_at = now

                optimizer.zero_grads()
                accum_loss.backward()
                accum_loss.unchain_backward()  # truncate
                if self.gpu >= 0:
                    accum_loss = Variable(cuda.zeros(()))
                else:
                    accum_loss = Variable(np.zeros((), dtype=np.float32))


                optimizer.clip_grads(self.settings.grad_clip)
                optimizer.update()

            if (i + 1) % jumpsPerEpoch == 0:
                epoch += 1

                if epoch >= self.settings.learning_rate_decay_after:
                    optimizer.lr *= self.settings.learning_rate_decay
                    print 'decayed self.settings.learning rate by a factor {} to {}'.format(self.settings.learning_rate_decay, optimizer.lr)
Ejemplo n.º 27
0
def generate_image(img_orig,
                   img_style,
                   width,
                   nw,
                   nh,
                   max_iter,
                   lr,
                   img_gen=None):
    mid_orig = nn.forward(Variable(img_orig, volatile=True))
    style_mats = [
        get_matrix(y) for y in nn.forward(Variable(img_style, volatile=True))
    ]

    if img_gen is None:
        if args.gpu >= 0:
            img_gen = xp.random.uniform(-20,
                                        20, (1, 3, width, width),
                                        dtype=np.float32)
        else:
            img_gen = np.random.uniform(-20, 20, (1, 3, width, width)).astype(
                np.float32)
    x = Variable(img_gen)
    xg = xp.zeros_like(x.data)
    optimizer = optimizers.Adam(alpha=lr)
    optimizer.setup((img_gen, xg))
    for i in range(max_iter):

        x = Variable(img_gen)
        y = nn.forward(x)

        optimizer.zero_grads()
        L = Variable(xp.zeros((), dtype=np.float32))
        for l in range(len(y)):
            ch = y[l].data.shape[1]
            wd = y[l].data.shape[2]
            gogh_y = F.reshape(y[l], (ch, wd**2))
            gogh_matrix = F.matmul(gogh_y, gogh_y, transb=True) / np.float32(
                ch * wd**2)

            L1 = np.float32(args.lam) * np.float32(
                nn.alpha[l]) * F.mean_squared_error(y[l],
                                                    Variable(mid_orig[l].data))
            L2 = np.float32(nn.beta[l]) * F.mean_squared_error(
                gogh_matrix, Variable(style_mats[l].data)) / np.float32(len(y))
            L += L1 + L2

            if i % 100 == 0:
                print i, l, L1.data, L2.data

        L.backward()
        xg += x.grad
        optimizer.update()

        tmp_shape = img_gen.shape
        if args.gpu >= 0:
            img_gen += Clip().forward(img_gen).reshape(tmp_shape) - img_gen
        else:

            def clip(x):
                return -120 if x < -120 else (136 if x > 136 else x)

            img_gen += np.vectorize(clip)(img_gen).reshape(tmp_shape) - img_gen

        if i % 3000 == 0:
            save_image(img_gen, W, nw, nh, i)
Ejemplo n.º 28
0
         y_t = cuda.to_gpu(y_t)
     state, loss_i = model.forward_one_step(x_t,
                                            y_t,
                                            state,
                                            dropout_ratio=args.dropout)
     loss += loss_i
 now = time.time()
 end_time += now - cur_at
 iterations_count += 1
 print 'loss_all=' + str(loss.data)
 print '{}, train_loss = {}, time = {:.4f}'.format(
     iterations_count, loss.data / (len(train_data[i % whole_len]) - 1),
     now - cur_at)
 cur_at = now
 optimizer.zero_grads()
 loss.backward()
 loss.unchain_backward()
 optimizer.clip_grads(grad_clip)
 optimizer.update()
 if (i + 1) == (whole_len * n_epochs):
     cuda.cupy.save('l1_x_W.npy', model.l1_x.W)
     cuda.cupy.save('l1_x_b.npy', model.l1_x.b)
     cuda.cupy.save('l1_h_W.npy', model.l1_h.W)
     cuda.cupy.save('l1_h_b.npy', model.l1_h.b)
     cuda.cupy.save('l6_W.npy', model.l6.W)
     cuda.cupy.save('l6_b.npy', model.l6.b)
 if ((i + 1) % whole_len) == 0:
     epoch += 1
     train_loss_all.append(loss.data.get() / len(train_data[i % whole_len]))
     for k in xrange(whole_val_len):
         val_state = make_initial_state(n_units)
Ejemplo n.º 29
0
class RNNCharEstimator(ChainerClassifier):
    def __init__(self,
                 net_type='lstm',
                 net_hidden=100,
                 vocab_size=1000,
                 dropout_ratio=0.0,
                 seq_size=70,
                 grad_clip=100.0,
                 **params):
        ChainerClassifier.__init__(self, **params)
        self.net_hidden = net_hidden
        self.net_type = net_type
        self.vocab_size = vocab_size
        self.dropout_ratio = dropout_ratio
        self.seq_size = seq_size
        self.grad_clip = grad_clip
        self.param_names.append('vocab_size')
        self.param_names.append('net_type')
        self.param_names.append('net_hidden')
        self.param_names.append('dropout_ratio')

    def setup_network(self, n_features):
        if self.net_type == 'lstm':
            self.network = CharLSTM(self.vocab_size, self.net_hidden,
                                    self.batch_size)
        elif self.net_type == 'irnn':
            self.network = CharIRNN(self.vocab_size, self.net_hidden,
                                    self.batch_size)
        else:
            error("Unknown net_type")
        self.reset_accum_loss()

    def reset_accum_loss(self):
        if self.gpu >= 0:
            self.accum_loss = Variable(cuda.zeros(()))
        else:
            self.accum_loss = Variable(np.zeros(()))

    def forward_train(self, x, t):
        return self.network.train(x, t, dropout_ratio=self.dropout_ratio)

    def predict(self, x_data):
        self.network.reset_state(1)
        if self.gpu >= 0:
            self.network.to_gpu()
            x_data = cuda.to_gpu(x_data)
        results = None
        for i in xrange(x_data.shape[0]):
            x = Variable(x_data[i, :])
            y = self.network.predict(x)
            if results == None:
                results = cuda.to_cpu(y.data)
            else:
                results = np.concatenate([results, cuda.to_cpu(y.data)])
        results = results.argmax(1)
        return results

    def fit_update(self, loss, batch_id):
        self.accum_loss += loss
        if ((batch_id + 1) % self.seq_size) == 0:  # Run Truncated BPTT
            self.optimizer.zero_grads()
            self.accum_loss.backward()
            self.accum_loss.unchain_backward()  # truncate
            self.optimizer.clip_grads(self.grad_clip)
            self.optimizer.update()
            self.reset_accum_loss()

    def make_batch(self, x_data, y_data, batch_id):
        batch_num = self.n_samples / self.batch_size
        x_batch = np.array([
            x_data[(batch_id + batch_num * j) % self.n_samples]
            for j in xrange(self.batch_size)
        ]).reshape(self.batch_size)
        y_batch = np.array([
            y_data[(batch_id + batch_num * j) % self.n_samples]
            for j in xrange(self.batch_size)
        ])
        return x_batch, y_batch
Ejemplo n.º 30
0
def train_encoder(
    model,
    dictionary: corpora.Dictionary,
    sentence_file: str,
    model_dir: str,
    epoch_size: int=100,
    batch_size: int=30,
    dropout: bool=True,
    gpu: bool=False
) -> None:
    if gpu >= 0:
        model.to_gpu()
        print(model.xp)

    # setup SGD optimizer
    opt = optimizers.SGD()
    opt.setup(model)

    # optimizer hooks
    clip_threshold = 5.0
    print("set optimizer clip threshold: {}".format(clip_threshold))
    opt.add_hook(chainer.optimizer.GradientClipping(clip_threshold))

    # load conversation sentences
    sentences = load_sentence(sentence_file)
    data_size = len(sentences)

    print("data size: {}".format(data_size))
    for epoch in range(epoch_size):
        print("epoch {}".format(epoch))

        indexes = np.random.permutation(data_size)
        epoch_loss = 0  # int

        for bat_i in range(0, data_size, batch_size):
            forward_start_time = datetime.now()
            batch_loss = Variable(model.xp.zeros((), dtype=model.xp.float32))

            for index in indexes[bat_i:bat_i + batch_size]:
                input_words = sentences[index]
                # id のリストに変換する
                input_words_with_s = tokens2ids(
                    input_words,
                    dictionary,
                    verbose=False
                )

                # フォワード
                try:
                    new_loss = model(
                        input_words_with_s,
                        dropout=dropout,
                        state=None,
                        train=True
                    )
                    if model.xp.isnan(new_loss.data):
                        sys.exit(1)

                    batch_loss += new_loss
                except Exception:
                    print(index, input_words_with_s)
                    import traceback
                    traceback.print_exc()

            # 平均化
            batch_size_array = model.xp.array(
                batch_size,
                dtype=model.xp.float32
            )
            # if gpu:
            #     batch_size_array = cuda.to_gpu(batch_size_array)
            batch_loss = batch_loss / Variable(batch_size_array)
            epoch_loss += batch_loss.data

            # 時間計測
            forward_end_time = datetime.now()

            # 最適化
            opt_start_time = datetime.now()
            model.zerograds()
            batch_loss.backward()
            opt.update()
            opt_end_time = datetime.now()

            forward_delta = forward_end_time - forward_start_time
            opt_delta = opt_end_time - opt_start_time

            print_fmt = (
                "epoch {} batch {}: "
                "loss {}, grad L2 norm: {}, forward {}, optimizer {}"
            )
            print(print_fmt.format(
                epoch,
                int(bat_i / batch_size),
                batch_loss.data,
                opt.compute_grads_norm(),
                forward_delta,
                opt_delta,
            ))
            # save
            if ((bat_i / batch_size) + 1) % 100 == 0:
                serializers.save_npz(
                    os.path.join(
                        model_dir,
                        "model.npz"
                    ),
                    model
                )
            if ((bat_i / batch_size) + 1) % 1000 == 0:
                serializers.save_npz(
                    os.path.join(
                        model_dir,
                        "model_{}_{}_{}.npz".format(
                            epoch,
                            int(bat_i / batch_size) + 1,
                            datetime.now().strftime("%Y%m%d-%H%M%S")
                        )
                    ),
                    model
                )
        print("finish epoch {}, loss {}".format(
            epoch,
            epoch_loss / epoch_size
        ))
        # save
        serializers.save_npz(
            os.path.join(
                model_dir,
                "model.npz"
            ),
            model
        )
        serializers.save_npz(
            os.path.join(
                model_dir,
                "model_{}_{}_{}.npz".format(
                    epoch,
                    int(bat_i / batch_size) + 1,
                    datetime.now().strftime("%Y%m%d-%H%M%S")
                )
            ),
            model
        )
Ejemplo n.º 31
0
list_sentences = readcsv("./files/list_id20151207.csv")
list_sentences = [np.array(row, np.int32) for row in list_sentences]

opt1 = SGD_Embedid()  # 確率的勾配法を使用
opt2 = SGD()  # 確率的勾配法を使用
opt1.setup(model1)  # 学習器の初期化
opt2.setup(model2)  # 学習器の初期化
opt1.tuples[0][1].fill(0)
opt2.zero_grads()
random.shuffle(list_sentences)
list_minibatch = []
for i, sentence in enumerate(list_sentences):
    list_minibatch.append(sentence)
    if len(list_minibatch) == BATCH_SIZE:
        accum_loss_total = Variable(np.zeros((), dtype=np.float32))  # 累積損失の初期値
        uniq_sentence = np.zeros((), np.int32)
        for batch_sentence in list_minibatch:
            accum_loss_total += forward(batch_sentence)  # 損失の計算
            uniq_sentence = np.append(uniq_sentence, batch_sentence)
        accum_loss_total.backward()  # 誤差逆伝播
        opt1.clip_grads(10)  # 大きすぎる勾配を抑制
        opt2.clip_grads(10)  # 大きすぎる勾配を抑制
        uniq_sentence = np.unique(uniq_sentence)
        opt1.update(uniq_sentence)  # パラメータの更新
        opt2.update()  # パラメータの更新
        opt1.zero_grads(uniq_sentence)  # 勾配の初期化
        opt2.zero_grads()  # 勾配の初期化
        list_minibatch = []
    if i % 1000 == 999:
        break
Ejemplo n.º 32
0
def train_encoder_decoder(
    model,
    dictionary: corpora.Dictionary,
    conversation_file: str,
    decoder_model_dir: str,
    epoch_size: int=100,
    batch_size: int=30,
    dropout: bool=False,
    gpu: bool=False
) -> None:
    if gpu >= 0:
        model.to_gpu()
        print(model.xp)

    # setup SGD optimizer
    # opt = optimizers.SGD()
    opt = optimizers.Adam()
    opt.setup(model)

    # optimizer hooks
    clip_threshold = 5.0
    print("set optimizer clip threshold: {}".format(clip_threshold))
    opt.add_hook(chainer.optimizer.GradientClipping(clip_threshold))

    # load conversation sentences
    conversation = load_conversation(conversation_file, dictionary)
    data_size = len(conversation)

    print("data size: {}".format(data_size))
    for epoch in range(epoch_size):
        print("running epoch {}".format(epoch))
        indexes = np.random.permutation(range(data_size))
        epoch_loss = 0  # int

        for bat_i in range(0, data_size, batch_size):
            forward_start_time = datetime.now()

            for index in indexes[bat_i:bat_i + batch_size]:
                pair_words = conversation[index]

                # encoder input words
                orig_words = pair_words[0][:-1]  # remove END_SYMBOL
                reply_words = pair_words[1]
                if orig_words:
                    assert orig_words[-1] is not config.END_SYMBOL
                input_words_with_s = tokens2ids(orig_words, dictionary)
                ys, state = model.predictor.forward(
                    [Variable(
                        model.xp.array(
                            [word],
                            dtype=model.xp.int32
                        )
                    ) for word in input_words_with_s],
                    state=None,
                    dropout=dropout,
                    train=True
                )

                # decode
                assert reply_words[0] == config.END_SYMBOL
                assert reply_words[-1] == config.END_SYMBOL
                output_words_with_s = tokens2ids(reply_words, dictionary)
                batch_loss = Variable(model.xp.zeros((), dtype=np.float32))
                try:
                    new_loss = model(
                        output_words_with_s,
                        state=state,  # init_state を input の state にする
                        dropout=dropout,
                        train=True
                    )
                    batch_loss += new_loss
                except Exception:
                    print(index, input_words_with_s)
                    import traceback
                    traceback.print_exc()
            # 平均化
            batch_size_array = model.xp.array(
                batch_size,
                dtype=model.xp.float32
            )
            batch_loss = batch_loss / Variable(batch_size_array)
            epoch_loss += batch_loss.data

            # 時間計測
            forward_end_time = datetime.now()

            # 最適化
            opt_start_time = datetime.now()
            model.zerograds()
            batch_loss.backward()
            opt.update()
            opt_end_time = datetime.now()

            forward_delta = forward_end_time - forward_start_time
            opt_delta = opt_end_time - opt_start_time
            # print(
            #     ("decoder epoch {} batch {}: loss {}, "
            #      "forward {}, optimizer {},").format(
            #         epoch,
            #         int(bat_i / batch_size),
            #         batch_loss.data,
            #         forward_delta,
            #         opt_delta,
            #     )
            # )
            print_fmt = (
                "epoch {} batch {}: "
                "loss {}, grad L2 norm: {}, forward {}, optimizer {}"
            )
            print(print_fmt.format(
                epoch,
                int(bat_i / batch_size),
                batch_loss.data,
                opt.compute_grads_norm(),
                forward_delta,
                opt_delta,
            ))
            # save
            if ((bat_i / batch_size) + 1) % 100 == 0:
                serializers.save_npz(
                    os.path.join(
                        decoder_model_dir,
                        "model.npz"
                    ),
                    model
                )
            if ((bat_i / batch_size) + 1) % 1000 == 0:
                serializers.save_npz(
                    os.path.join(
                        decoder_model_dir,
                        "model_{}_{}_{}.npz".format(
                            epoch,
                            int(bat_i / batch_size) + 1,
                            datetime.now().strftime("%Y%m%d-%H%M%S")
                        )
                    ),
                    model
                )
        print("finish epoch {}, loss {}".format(
            epoch,
            epoch_loss / math.ceil(data_size / batch_size)
        ))
        # save
        serializers.save_npz(
            os.path.join(
                decoder_model_dir,
                "model.npz"
            ),
            model
        )
        serializers.save_npz(
            os.path.join(
                decoder_model_dir,
                "model_{}_{}_{}.npz".format(
                    epoch,
                    int(bat_i / batch_size) + 1,
                    datetime.now().strftime("%Y%m%d-%H%M%S")
                )
            ),
            model
        )
Ejemplo n.º 33
0
    if args.gpu >=0:
        x_batch = cuda.to_gpu(x_batch)
        y_batch = cuda.to_gpu(y_batch)

    state, loss_i = model.forward_one_step(x_batch, y_batch, state, dropout_ratio=args.dropout)
    accum_loss   += loss_i

    if (i + 1) % bprop_len == 0:  # Run truncated BPTT
        now = time.time()
        print '{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at)
        loss_file.write('{}\n'.format(accum_loss.data / bprop_len))
        cur_at = now

        optimizer.zero_grads()
        accum_loss.backward()
        accum_loss.unchain_backward()  # truncate
        if args.gpu >= 0:
            accum_loss = Variable(cuda.zeros(()))
        else:
            accum_loss = Variable(np.zeros(()).astype(np.float32))

        optimizer.clip_grads(grad_clip)
        optimizer.update()

    if args.enable_checkpoint:
        if (i + 1) % 10000 == 0:
            fn = ('%s/charrnn_epoch_%.2f.chainermodel' % (args.checkpoint_dir, float(i)/jump))
            pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb'))

    if (i + 1) % jump == 0:
Ejemplo n.º 34
0
class RNNCharEstimator(ChainerClassifier):
    def __init__(self, net_type='lstm', net_hidden=100,
                       vocab_size=1000, dropout_ratio=0.0, seq_size=70, grad_clip=100.0,
                       **params):
        ChainerClassifier.__init__(self, **params)
        self.net_hidden    = net_hidden
        self.net_type      = net_type
        self.vocab_size    = vocab_size
        self.dropout_ratio = dropout_ratio
        self.seq_size      = seq_size
        self.grad_clip     = grad_clip
        self.param_names.append('vocab_size')
        self.param_names.append('net_type')
        self.param_names.append('net_hidden')
        self.param_names.append('dropout_ratio')

    def setup_network(self, n_features):
        if self.net_type == 'lstm':
            self.network = CharLSTM(self.vocab_size, self.net_hidden, self.batch_size)
        elif self.net_type == 'irnn':
            self.network = CharIRNN(self.vocab_size, self.net_hidden, self.batch_size)
        else:
            error("Unknown net_type")
        self.reset_accum_loss()

    def reset_accum_loss(self):
        if self.gpu >= 0:
            self.accum_loss = Variable(cuda.zeros(()))
        else:
            self.accum_loss = Variable(np.zeros(()))

    def forward_train(self, x, t):
        return self.network.train(x, t, dropout_ratio=self.dropout_ratio)

    def predict(self, x_data):
        self.network.reset_state(1)
        if self.gpu >= 0:
            self.network.to_gpu()
            x_data = cuda.to_gpu(x_data)
        results = None
        for i in xrange(x_data.shape[0]):
            x = Variable(x_data[i,:])
            y = self.network.predict(x)
            if results == None:
                results = cuda.to_cpu(y.data)
            else:
                results = np.concatenate([results, cuda.to_cpu(y.data)])
        results = results.argmax(1)
        return results

    def fit_update(self, loss, batch_id):
        self.accum_loss += loss
        if ((batch_id + 1) % self.seq_size) == 0: # Run Truncated BPTT
            self.optimizer.zero_grads()
            self.accum_loss.backward()
            self.accum_loss.unchain_backward()  # truncate
            self.optimizer.clip_grads(self.grad_clip)
            self.optimizer.update()
            self.reset_accum_loss()

    def make_batch(self, x_data, y_data, batch_id):
        batch_num = self.n_samples / self.batch_size
        x_batch = np.array([x_data[(batch_id + batch_num * j) % self.n_samples]
                            for j in xrange(self.batch_size)]).reshape(self.batch_size)
        y_batch = np.array([y_data[(batch_id + batch_num * j) % self.n_samples]
                            for j in xrange(self.batch_size)])
        return x_batch, y_batch
Ejemplo n.º 35
0
                        for j in range(batchsize)])

    if args.gpu >=0:
        x_batch = cuda.to_gpu(x_batch)
        y_batch = cuda.to_gpu(y_batch)

    state, loss_i = model.forward_one_step(x_batch, y_batch, state, dropout_ratio=args.dropout)
    accum_loss   += loss_i

    if (i + 1) % bprop_len == 0:  # Run truncated BPTT
        now = time.time()
        print('{}/{}, train_loss = {}, time = {:.2f}'.format((i+1)/bprop_len, jump, accum_loss.data / bprop_len, now-cur_at))
        cur_at = now

        optimizer.zero_grads()
        accum_loss.backward()
        accum_loss.unchain_backward()  # truncate
        if args.gpu >= 0:
            accum_loss = Variable(cuda.zeros(()))
        else:
            accum_loss = Variable(np.zeros((), dtype=np.float32))

        optimizer.clip_grads(grad_clip)
        optimizer.update()

    if (i + 1) % 10000 == 0:
        fn = ('%s/charrnn_epoch_%.2f.chainermodel' % (args.checkpoint_dir, float(i)/jump))
        pickle.dump(copy.deepcopy(model).to_cpu(), open(fn, 'wb'))

    if (i + 1) % jump == 0:
        epoch += 1
Ejemplo n.º 36
0
l1 = L.Linear(4, 3)
l2 = L.Linear(3, 2)


# ユニット数が 4 -> 3 -> 2 のネットワーク
def my_forward(x):
    h = l1(x)
    return l2(h)


x = Variable(np.array([[1, 2, 3, 4], [4, 5, 6, 7]], dtype=np.float32))
y = my_forward(x)

y.grad = np.ones((2, 2), dtype=np.float32)
x.backward()

print("x.data = ")
print(x.data)
print()

print("l1.W = ")
print(l1.W.data)
print("l1.b = ")
print(l1.b.data)
print()
print("l2.W = ")
print(l2.W.data)
print("l2.b = ")
print(l2.b.data)
print()