Ejemplo n.º 1
0
    def setup_optimizers(self):
        params = self.params

        self.causal_conv_optimizers = []
        for layer in self.causal_conv_layers:
            opt = optimizers.NesterovAG(lr=params.learning_rate,
                                        momentum=params.gradient_momentum)
            opt.setup(layer)
            opt.add_hook(optimizer.WeightDecay(params.weight_decay))
            opt.add_hook(GradientClipping(params.gradient_clipping))
            self.causal_conv_optimizers.append(opt)

        self.residual_conv_optimizers = []
        for layer in self.residual_conv_layers:
            opt = optimizers.NesterovAG(lr=params.learning_rate,
                                        momentum=params.gradient_momentum)
            opt.setup(layer)
            opt.add_hook(optimizer.WeightDecay(params.weight_decay))
            opt.add_hook(GradientClipping(params.gradient_clipping))
            self.residual_conv_optimizers.append(opt)

        self.softmax_conv_optimizers = []
        for layer in self.softmax_conv_layers:
            opt = optimizers.NesterovAG(lr=params.learning_rate,
                                        momentum=params.gradient_momentum)
            opt.setup(layer)
            opt.add_hook(optimizer.WeightDecay(params.weight_decay))
            opt.add_hook(GradientClipping(params.gradient_clipping))
            self.softmax_conv_optimizers.append(opt)
Ejemplo n.º 2
0
	def setup_optimizers(self):
		params = self.params
		
		self.causal_conv_optimizers = []
		for layer in self.causal_conv_layers:
			opt = optimizers.Adam(alpha=params.learning_rate, beta1=params.gradient_momentum)
			opt.setup(layer)
			opt.add_hook(optimizer.WeightDecay(params.weight_decay))
			opt.add_hook(GradientClipping(params.gradient_clipping))
			self.causal_conv_optimizers.append(opt)
		
		self.residual_conv_optimizers = []
		for block in self.residual_blocks:
			for layer in block:
				opt = optimizers.Adam(alpha=params.learning_rate, beta1=params.gradient_momentum)
				opt.setup(layer)
				opt.add_hook(optimizer.WeightDecay(params.weight_decay))
				opt.add_hook(GradientClipping(params.gradient_clipping))
				self.residual_conv_optimizers.append(opt)
		
		self.softmax_conv_optimizers = []
		for layer in self.softmax_conv_layers:
			opt = optimizers.Adam(alpha=params.learning_rate, beta1=params.gradient_momentum)
			opt.setup(layer)
			opt.add_hook(optimizer.WeightDecay(params.weight_decay))
			opt.add_hook(GradientClipping(params.gradient_clipping))
			self.softmax_conv_optimizers.append(opt)
Ejemplo n.º 3
0
    def load(cls, name):
        params = Trainer.load_params(name)
        print('nz: {}'.format(params['nz']))
        print('epoch: {} / {}'.format(params['current_epoch'],
                                      params['epoch']))
        print('train: {}'.format(params['train']))
        print('batchsize: {}'.format(params['batchsize']))

        dcgan = DCGAN(params['nz'])
        opt_gen = optimizers.Adam(alpha=0.0002, beta1=0.5)
        opt_dis = optimizers.Adam(alpha=0.0002, beta1=0.5)
        opt_gen.setup(dcgan.gen)
        opt_dis.setup(dcgan.dis)
        opt_gen.add_hook(optimizer.WeightDecay(0.00001))
        opt_dis.add_hook(optimizer.WeightDecay(0.00001))

        filenames = Trainer.get_model_filenames(name, params['current_epoch'])
        model_dir = os.path.join(Trainer.MODEL_DIR, name)
        serializers.load_hdf5(os.path.join(model_dir, filenames['model_gen']),
                              dcgan.gen)
        serializers.load_hdf5(os.path.join(model_dir, filenames['model_dis']),
                              dcgan.dis)
        serializers.load_hdf5(os.path.join(model_dir, filenames['opt_gen']),
                              opt_gen)
        serializers.load_hdf5(os.path.join(model_dir, filenames['opt_dis']),
                              opt_dis)

        return cls(name, params, dcgan, opt_gen, opt_dis)
Ejemplo n.º 4
0
 def setLR(self, lr=0.002):
     self.gen_opt = optimizers.Adam(alpha=lr)
     self.gen_opt.setup(self.generator)
     self.gen_opt.add_hook(optimizer.WeightDecay(0.0001))
     self.dis_opt = optimizers.Adam(alpha=lr)
     self.dis_opt.setup(self.discriminator)
     self.dis_opt.add_hook(optimizer.WeightDecay(0.0001))
Ejemplo n.º 5
0
 def test_call_hooks_uninitialized_param(self):
     target = UninitializedChain()
     opt = optimizers.MomentumSGD()
     opt.setup(target)
     opt.add_hook(optimizer.WeightDecay(rate=0.0005))
     target(np.ones((4, 10), dtype=np.float32))
     opt.call_hooks()
Ejemplo n.º 6
0
    def setup_optimizer(self,
                        optimizer_name,
                        gradient_clipping=3,
                        weight_decay=0.00001,
                        **kwargs):
        # set optimizer
        if optimizer_name == "Adam":
            self.opt = optimizers.Adam(**kwargs)
        elif optimizer_name == "AdaDelta":
            self.opt = optimizers.AdaDelta(**kwargs)
        elif optimizer_name == "AdaGrad":
            self.opt = optimizers.AdaGrad(**kwargs)
        elif optimizer_name == "RMSprop":
            self.opt = optimizers.RMSprop(**kwargs)
        elif optimizer_name == "RMSpropGraves":
            self.opt = optimizers.RMSpropGraves(**kwargs)
        elif optimizer_name == "SGD":
            self.opt = optimizers.SGD(**kwargs)
        elif optimizer_name == "MomentumSGD":
            self.opt = optimizers.MomentumSGD(**kwargs)

        # self.opt.use_cleargrads()
        self.opt.setup(self)
        self.opt.add_hook(optimizer.GradientClipping(gradient_clipping))
        self.opt.add_hook(optimizer.WeightDecay(weight_decay))

        self.opt_params = {
            "optimizer_name": optimizer_name,
            "gradient_clipping": gradient_clipping,
            "weight_decay": weight_decay
        }
Ejemplo n.º 7
0
def train(args):
    source_vocab = Vocab(args.source, args.vocab)
    target_vocab = Vocab(args.target, args.vocab)
    att_encdec = ABED(args.vocab, args.hidden_size, args.maxout_hidden_size,
                      args.embed_size)
    if args.use_gpu:
        att_encdec.to_gpu()
    if args.source_validation:
        if os.path.exists(PLOT_DIR) == False: os.mkdir(PLOT_DIR)
        fp_loss = open(PLOT_DIR + "loss", "w")
        fp_loss_val = open(PLOT_DIR + "loss_val", "w")

    opt = optimizers.AdaDelta(args.rho, args.eps)
    opt.setup(att_encdec)
    opt.add_hook(optimizer.WeightDecay(DECAY_COEFF))
    opt.add_hook(optimizer.GradientClipping(CLIP_THR))
    for epoch in xrange(args.epochs):
        print "--- epoch: %s/%s ---" % (epoch + 1, args.epochs)
        source_gen = word_list(args.source)
        target_gen = word_list(args.target)
        batch_gen = batch(sort(source_gen, target_gen, 100 * args.minibatch),
                          args.minibatch)
        n = 0
        total_loss = 0.0
        for source_batch, target_batch in batch_gen:
            n += len(source_batch)
            source_batch = fill_batch_end(source_batch)
            target_batch = fill_batch_end(target_batch)
            hyp_batch, loss = forward(source_batch, target_batch, source_vocab,
                                      target_vocab, att_encdec, True, 0)
            total_loss += loss.data * len(source_batch)
            closed_test(source_batch, target_batch, hyp_batch)

            loss.backward()
            opt.update()
            print "[n=%s]" % (n)
        print "[total=%s]" % (n)
        prefix = args.model_path + '%s' % (epoch + 1)
        serializers.save_hdf5(prefix + '.attencdec', att_encdec)
        if args.source_validation:
            total_loss_val, n_val = validation_test(args, att_encdec,
                                                    source_vocab, target_vocab)
            fp_loss.write("\t".join([str(epoch), str(total_loss / n) + "\n"]))
            fp_loss_val.write("\t".join(
                [str(epoch), str(total_loss_val / n_val) + "\n"]))
            fp_loss.flush()
            fp_loss_val.flush()
        hyp_params = att_encdec.get_hyper_params()
        Backup.dump(hyp_params, args.model_path + HPARAM_NAME)
        source_vocab.save(args.model_path + SRC_VOCAB_NAME)
        target_vocab.save(args.model_path + TAR_VOCAB_NAME)
    hyp_params = att_encdec.get_hyper_params()
    Backup.dump(hyp_params, args.model_path + HPARAM_NAME)
    source_vocab.save(args.model_path + SRC_VOCAB_NAME)
    target_vocab.save(args.model_path + TAR_VOCAB_NAME)
    if args.source_validation:
        fp_loss.close()
        fp_loss_val.close()
Ejemplo n.º 8
0
    def create(cls, name, params):
        merged_params = {}
        merged_params.update(Trainer.DEFAULT_PARAMS)
        merged_params.update(params)
        assert merged_params['nz'] >= 0
        assert merged_params['epoch'] >= 0
        assert merged_params['train'] >= 0
        assert merged_params['batchsize'] >= 0

        dcgan = DCGAN(merged_params['nz'])
        opt_gen = optimizers.Adam(alpha=0.0002, beta1=0.5)
        opt_dis = optimizers.Adam(alpha=0.0002, beta1=0.5)
        opt_gen.setup(dcgan.gen)
        opt_dis.setup(dcgan.dis)
        opt_gen.add_hook(optimizer.WeightDecay(0.00001))
        opt_dis.add_hook(optimizer.WeightDecay(0.00001))

        return cls(name, merged_params, dcgan, opt_gen, opt_dis)
Ejemplo n.º 9
0
    def check_weight_decay(self):
        w = self.target.param.data
        g = self.target.param.grad

        decay = 0.2
        expect = w - g - decay * w

        opt = optimizers.SGD(lr=1)
        opt.setup(self.target)
        opt.add_hook(optimizer.WeightDecay(decay))
        opt.update()

        testing.assert_allclose(expect, w)
Ejemplo n.º 10
0
def train(X, t, hidden_n, weight_decay):
    print(hidden_n, weight_decay)

    model = AnimeChain(X.shape[1], hidden_n)
    optimizer = OS.AdaGrad()
    optimizer.setup(model)
    if weight_decay:
        optimizer.add_hook(O.WeightDecay(weight_decay))

    for e in range(1500):
        V_X = Variable(X)
        V_t = Variable(np.array(t, dtype='int32'))

        V_y = model(V_X)
        model.zerograds()
        loss = F.softmax_cross_entropy(V_y, V_t)
        loss.backward()
        optimizer.update()
        #print(e, loss.data)
        if loss.data < 0.001:
            break
    return model
Ejemplo n.º 11
0
def train(args):
    if args.gpu > -1:
        cuda.get_device(args.gpu).use()
        xp = cuda.cupy
    else:
        xp = np

    if args.log:
        log_dir = args.log
    else:
        log_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), '{}_{}'.format(DIR_NAME, datetime.now().strftime('%Y%m%d_%H:%M')))

    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    # setting for logging
    logger = logging.getLogger()
    logging.basicConfig(level=logging.INFO)
    log_path = os.path.join(log_dir, 'log')
    file_handler = logging.FileHandler(log_path)
    fmt = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    file_handler.setFormatter(fmt)
    logger.addHandler(file_handler)

    logger.info('Arguments...')
    for arg, val in vars(args).items():
        logger.info('{} : {}'.format(arg, val))

    logger.info('Loading Vocab...')
    vocab = Vocab()
    vocab.load(args.vocab, args.lowercase)
    vocab.add_special_token()

    sufvocab = Vocab()
    sufvocab.load(args.sufvocab, args.lowercase)
    sufvocab.add_special_token(['s>', '<UNK>'])

    pos2id = Vocab()
    pos2id.load(args.poslist)

    logger.info('preparation for training data...')
    out_path = making_data(args.train_data, args.window)

    model = WordCSnnTagger(args.wembed, args.fembed, args.hidden, len(vocab), len(sufvocab), len(pos2id), args.window, args.objct, args.alpha)
    model.save_model_config(log_dir)

    if args.gpu > -1:
        model.to_gpu()

    opt = getattr(optimizers, args.opt)()
    opt.setup(model)
    opt.add_hook(optimizer.GradientClipping(args.gclip))
    opt.add_hook(optimizer.WeightDecay(args.wdecay))

    for epoch in range(args.epoch):
        logger.info('START epoch {}/{}'.format(epoch + 1, args.epoch))
        start = time.time()
        sum_loss = xp.zeros((), dtype=xp.float32)
        n_data = 0
        n_correct = 0
        for i, [tags, contexts] in enumerate(line_iter(out_path, args.minibatch)):
            batch_ts = xp.array([pos2id[tag] for tag in tags], dtype=xp.int32)
            batch_caps = xp.array([[get_capf(word) for word in context] for context in contexts], dtype=xp.int32)
            if args.lowercase:
                contexts = [[word.lower() for word in context] for context in contexts]
            batch_xs = xp.array([[vocab[word] for word in context] for context in contexts], dtype=xp.int32)
            batch_sufs = xp.array([[sufvocab[word[-2:]] for word in context] for context in contexts], dtype=xp.int32)
            batch_caps = xp.array([[get_capf(word) for word in context] for context in contexts], dtype=xp.int32)
            batch_features = [batch_xs, batch_sufs, batch_caps]
            cur_batch_size = batch_ts.shape[0]
            ys, loss = model(batch_features, batch_ts)
            sum_loss += loss.data * cur_batch_size
            model.zerograds()
            loss.backward()
            opt.update()
            pred_labels = ys.data.argmax(1)
            n_correct += sum(1 for j in range(cur_batch_size) if pred_labels[j] == batch_ts[j])
            n_data += cur_batch_size
            logger.info('done {} batches'.format(i + 1))
        logger.info('{} epoch train loss = {}'.format(epoch + 1, sum_loss))
        logger.info('{} epoch train accuracy = {}'.format(epoch + 1, float(n_correct / n_data)))
        logger.info('{} sec for training per epoch'.format(time.time() - start))

        if args.valid_data:
            start = time.time()
            valid_loss, valid_accuracy = evaluation(model, args.valid_data, pos2id, vocab, sufvocab, args)
            logger.info('{} epoch valid loss = {}'.format(epoch + 1, valid_loss))
            logger.info('{} epoch valid accuracy = {}'.format(epoch + 1, valid_accuracy))
            logger.info('{} sec for validation per epoch'.format(time.time() - start))

        if args.test_data:
            start = time.time()
            test_loss, test_accuracy = evaluation(model, args.test_data, pos2id, vocab, sufvocab, args)
            logger.info('{} epoch test loss = {}'.format(epoch + 1, test_loss))
            logger.info('{} epoch test accuracy = {}'.format(epoch + 1, test_accuracy))
            logger.info('{} sec for testing per epoch'.format(time.time() - start))

        logger.info('serializing...')
        prefix = '{}_{}ep_{}wembed_{}fembed_{}hidden_{}window_{}minibatch_{}opt'.format(DIR_NAME, epoch + 1, args.wembed, args.fembed, args.hidden, args.window, args.minibatch, args.opt)
        model_path = os.path.join(log_dir, prefix + '.model')
        model.save(model_path)

    logger.info('done training')
Ejemplo n.º 12
0
if __name__ == "__main__":
    set_seed()
    log_tracer = LogTracer(nn_type, sep_mode)

    log_tracer("get train data")
    train, test, n_vocab = get_train_data(pad, sep_mode)
    log_tracer.trace_label("train", train)
    log_tracer.trace_label("test", test)

    if nn_type == "lstm":
        mlp = LSTM(n_vocab, n_units, N_OUT)
    elif nn_type == "cnn":
        mlp = CNN(n_vocab, n_units, N_OUT)
    opt = optimizers.Adam()
    opt.setup(mlp)
    opt.add_hook(optimizer.WeightDecay(w_decay))
    opt.add_hook(optimizer.GradientClipping(g_clip))

    log_tracer("start train")
    for epoch in range(n_epoch):
        for x, t in generate_bath(train, n_batch):
            mlp.cleargrads()
            loss, acc = mlp(x, t, train=True)
            loss.backward()
            opt.update()
            log_tracer.trace_train(epoch, loss.data, acc.data)
        x_v, t_v = parse_batch(test)
        loss_v, acc_v = mlp(x_v, t_v)
        log_tracer.trace_test(epoch, loss_v.data, acc_v.data, True)
    mlp.save(sep_mode)
Ejemplo n.º 13
0
validate_iter = iterators.SerialIterator(validate_dataset,
                                         args.batchsize,
                                         repeat=False,
                                         shuffle=False)

# model
model = model.make_model(V, args.embed_dim, args.channel_num, args.rnn_dim,
                         args.fc_dim, C)
classifier = L.Classifier(model)
if args.gpu >= 0:
    classifier.to_gpu()

# optimizer
optimizer = optimizers.Adam()
optimizer.setup(classifier)
optimizer.add_hook(optimizer_.WeightDecay(1e-3))

# trainer
updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

# extensions
log_report = E.LogReport(trigger=(10, 'iteration'))
print_report = E.PrintReport([
    'epoch', 'iteration', 'main/loss', 'main/accuracy', 'validation/main/loss',
    'validation/main/accuracy', 'elapsed_time'
])

evaluator = evaluator_.Evaluator(validate_iter, classifier, device=args.gpu)

trainer.extend(log_report)
Ejemplo n.º 14
0
# get the correct total length
n_train = train_data._length

# the discriminator is tasked with classifying examples as real or fake
Disc = CustomClassifier(predictor=d.Discriminator(latent_dim),
                        lossfun=f.sigmoid_cross_entropy)
Disc.compute_accuracy = False
Gen = g.Generator()
Enc = e.Encoder(latent_dim)

# Use Adam optimizer
# learning rate, beta1, beta2
disc_optimizer = optimizers.Adam(initial_alpha, beta1=beta_1, beta2=beta_2)
disc_optimizer.setup(Disc)
# using those parameters for all optimizers: 0.36 --> 0.61
disc_optimizer.add_hook(optimizer.WeightDecay(rate=weight_decay))

gen_optimizer = optimizers.Adam(initial_alpha, beta1=beta_1, beta2=beta_2)
gen_optimizer.setup(Gen)
gen_optimizer.add_hook(optimizer.WeightDecay(rate=weight_decay))

enc_optimizer = optimizers.Adam(initial_alpha, beta1=beta_1, beta2=beta_2)
enc_optimizer.setup(Enc)
enc_optimizer.add_hook(optimizer.WeightDecay(rate=weight_decay))

#Define iterator
train_iter_X = i.SerialIterator(train_data,
                                batch_size=batchsize,
                                repeat=True,
                                shuffle=True)
def main():
  args = parse_args()
  XP.set_library(args)
  date=time.localtime()[:6]
  D=[]
  for i in date:
    D.append(str(i))
  D="_".join(D)

  save_path=args.save_path
  if os.path.exists(save_path)==False:
    os.mkdir(save_path)

  if args.model_path!=None:
    print("continue existed model!! load recipe of {}".format(args.model_path))
    with open(args.model_path+'/recipe.json','r') as f:
      recipe=json.load(f)
    vae_enc=recipe["network"]["IM"]["vae_enc"]
    vae_z=recipe["network"]["IM"]["vae_z"]
    vae_dec=recipe["network"]["IM"]["vae_dec"]
    times=recipe["network"]["IM"]["times"]
    alpha=recipe["network"]["IM"]["KLcoefficient"]
    
    batchsize=recipe["setting"]["batchsize"]
    maxepoch=args.maxepoch
    weightdecay=recipe["setting"]["weightdecay"]
    grad_clip=recipe["setting"]["grad_clip"]
    cur_epoch=recipe["setting"]["cur_epoch"]+1
    ini_lr=recipe["setting"]["initial_learningrate"]
    cur_lr=recipe["setting"]["cur_lr"]            

    with open(args.model_path+"/../trainloss.json",'r') as f:
      trainloss_dic=json.load(f)
    with open(args.model_path+"/../valloss.json",'r') as f:
      valloss_dic=json.load(f)

  else:
    vae_enc=args.vae_enc
    vae_z=args.vae_z
    vae_dec=args.vae_dec
    times=args.times
    alpha=args.alpha
    batchsize=args.batchsize
    maxepoch=args.maxepoch
    weightdecay=args.weightdecay
    grad_clip=5
    cur_epoch=0
    ini_lr=args.lr
    cur_lr=ini_lr
    trainloss_dic={}
    valloss_dic={}

  print('this experiment started at :{}'.format(D))
  print('***Experiment settings***')
  print('[IM]vae encoder hidden size :{}'.format(vae_enc))
  print('[IM]vae hidden layer size :{}'.format(vae_z))
  print('[IM]vae decoder hidden layer size :{}'.format(vae_dec)) 
  print('[IM]sequence length:{}'.format(times)) 
  print('max epoch :{}'.format(maxepoch))
  print('mini batch size :{}'.format(batchsize))
  print('initial learning rate :{}'.format(cur_lr))
  print('weight decay :{}'.format(weightdecay))
  print("optimization by :{}".format("Adam"))
  print("VAE KL coefficient:",alpha)
  print('*************************') 
  
  vae = VAE_bernoulli_noattention(vae_enc,vae_z,vae_dec,28,28,1)
  opt = optimizers.Adam(alpha = cur_lr)
  opt.setup(vae)
  if args.model_path!=None:
    print('loading model ...')
    serializers.load_npz(args.model_path + '/VAEweights', vae)
    serializers.load_npz(args.model_path + '/optimizer', opt)
  else:
    print('making [[new]] model ...')
    for param in vae.params():
      data = param.data
      data[:] = np.random.uniform(-0.1, 0.1, data.shape)
  opt.add_hook(optimizer.GradientClipping(grad_clip))
  opt.add_hook(optimizer.WeightDecay(weightdecay))  

  if args.gpu >= 0 :
    vae.to_gpu()

  mnist=MNIST(binarize=True)
  train_size = mnist.train_size
  test_size = mnist.test_size
 
  eps = 1e-8
  for epoch in range(cur_epoch+1, maxepoch+1):
    print('\nepoch {}'.format(epoch))
    LX = 0.0
    LZ = 0.0
    counter = 0
    for iter,(img_array,label_array) in enumerate(mnist.gen_train(batchsize,Random=True)):
        B = img_array.shape[0]
        Lz = XP.fzeros(())
        vae.reset(img_array)
        
        #first to T-1 step
        for j in range(times-1):
            y,kl = vae.free_energy_onestep()
            Lz_i = alpha*kl
            Lz += Lz_i
        #last step
        j+=1
        y,kl = vae.free_energy_onestep()
        Lz_i = alpha*kl
        Lz += Lz_i
        Lx = Bernoulli_nll_wesp(vae.x,y,eps)
        
        LZ += Lz.data
        LX += Lx.data
 
        loss = (Lx+Lz)/batchsize
        loss.backward()
        opt.update()

        counter += B
        sys.stdout.write('\rnow training ...  epoch {}, {}/{}  '.format(epoch,counter,mnist.train_size))
        sys.stdout.flush()
        if (iter+1) % 100 == 0:
          print("({}-th batch mean loss) Lx:%03.3f Lz:%03.3f".format(counter) % (Lx.data/B,Lz.data/B))

    img_array = cuda.to_cpu(y.data)
    im_array = img_array.reshape(batchsize*28,28)
    img = im_array[:28*5]
    plt.clf()
    plt.imshow(img,cmap=cm.gray)
    plt.colorbar(orientation='horizontal')
    plt.savefig(save_path+"/"+"img{}.png".format(epoch))

    trace(save_path+"/trainloss.txt","epoch {} Lx:{} Lz:{} Lx+Lz:{}".format(epoch,LX/train_size,LZ/train_size,(LX+LZ)/train_size))            	
    trainloss_dic[str(epoch).zfill(3)]={
                    "Lx":float(LX/train_size),
                    "Lz":float(LZ/train_size),
                    "Lx+Lz":float((LX+LZ)/train_size)}
    with open(save_path+"/trainloss.json",'w') as f:
        json.dump(trainloss_dic,f,indent=4)   

    print('save model ...')
    prefix = save_path+"/"+str(epoch).zfill(3)
    if os.path.exists(prefix)==False:
        os.mkdir(prefix)        
    serializers.save_npz(prefix + '/VAEweights', vae) 
    serializers.save_npz(prefix + '/optimizer', opt)
    print('save recipe...')
    recipe_dic = {
    "date":D,
    "setting":{
        "maxepoch":maxepoch,
        "batchsize":batchsize,
        "weightdecay":weightdecay,
        "grad_clip":grad_clip,
        "opt":"Adam",
        "initial_learningrate":ini_lr,
        "cur_epoch":epoch,
        "cur_lr":cur_lr},
    "network":{
        "IM":{
            "x_size":784,
            "vae_enc":vae_enc,
            "vae_z":vae_z,
            "vae_dec":vae_dec,
            "times":times,
            "KLcoefficient":alpha},
            },
            }
    with open(prefix+'/recipe.json','w') as f:
      json.dump(recipe_dic,f,indent=4)
           
    if epoch % 1 == 0:
        print("\nvalidation step")
        LX = 0.0
        LZ = 0.0        
        counter = 0
        for iter,(img_array,label_array) in enumerate(mnist.gen_test(batchsize)):
            B = img_array.shape[0]
            Lz = XP.fzeros(())
            vae.reset(img_array)
            
            #first to T-1 step
            for j in range(times-1):
                y,kl = vae.free_energy_onestep()
                Lz_i = alpha*kl
                Lz += Lz_i           
            #last step
            j+=1
            y,kl = vae.free_energy_onestep()
            Lz_i = alpha*kl
            Lz += Lz_i  
            Lx = Bernoulli_nll_wesp(vae.x,y,eps)

            LZ += Lz.data.reshape(())
            LX += Lx.data.reshape(())

            counter += B
            sys.stdout.write('\rnow testing ...  epoch {}, {}/{}  '.format(epoch,counter,test_size))
            sys.stdout.flush()
        print("")
        trace(save_path+"/valloss.txt","epoch {} Lx:{} Lz:{} Lx+Lz:{}".format(epoch,LX/test_size,LZ/test_size,(LX+LZ)/test_size))                  		
        valloss_dic[str(epoch).zfill(3)]={
                        "Lx":float(LX/test_size),
						"Lz":float(LZ/test_size),
						"Lx+Lz":float((LX+LZ)/test_size)}
        with open(save_path+"/valloss.json",'w') as f:
            json.dump(valloss_dic,f,indent=4)

        img_array = cuda.to_cpu(y.data)
        im_array = img_array.reshape(batchsize*28,28)
        img = im_array[:28*5]
        plt.clf()
        plt.imshow(img,cmap=cm.gray)
        plt.colorbar(orientation='horizontal')
        plt.savefig(save_path+"/"+"img_test{}.png".format(epoch))
  print('finished.') 
Ejemplo n.º 16
0
    def for_one_batch_training(self):
        loss_list = []
        text_count = 0
        model_list = glob.glob(
            "_".join(self.OUTPUT_PATH.format("model", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, "*", 0).split("_")[:-1]))
        model = Att_Seq2TF(emb_size=self.EMBED_SIZE,
                           fnn_size=self.FNN_SIZE,
                           hidden_size=self.HIDDEN_SIZE,
                           num_of_middle_layer=self.num_of_middle_layer,
                           use_dropout=self.USE_DROPOUT,
                           flag_gpu=self.FLAG_GPU)
        if len(model_list) != 0:
            for model_cand in sorted(model_list, key=lambda x: int(x.split("_")[-2][9:])):
                loss_list.append(
                    float(model_cand[model_cand.find("loss") + 4:model_cand.rfind(".")]))
            serializers.load_hdf5(model_cand, model)
            text_count = int(model_cand.split("_")[-2][9:])
            print(model_cand)
            print(text_count)
            print(loss_list)
        if self.FLAG_GPU:
            model.to_gpu(0)
        model.reset()
        # print("d")
        opt = optimizers.Adam()
        # optimizer.use_cleargrads()
        opt.setup(model)
        opt.add_hook(optimizer.WeightDecay(0.0005))
        opt.add_hook(optimizer.GradientClipping(5))
        opt_list = glob.glob(
            "_".join(self.OUTPUT_PATH.format("opt", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, "*", 0).split("_")[:-1]))
        if len(opt_list) != 0:
            opt_list = sorted(
                opt_list, key=lambda x: int(x.split("_")[-2][9:]))
            serializers.load_hdf5(opt_list[-1], opt)
            print(opt_list[-1])

        # rupe_of_trainging
        # train_losses = []
        # test_losses = []
        print("start...")
        start_time = time.time()
        # 学習開始
        q = Queue(100)
        q_valid = Queue(500)
        q_valid1 = Queue(500)
        minibatch_maker = MinibatchMaker(
            self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 0)
        p = Process(target=minibatch_maker.epoch_pickle, args=(q, ))
        p.start()
        # minibatch_maker1 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 1)
        # p1 = Process(target=minibatch_maker1.epoch_factory, args=(q, ))
        # p1.start()
        # minibatch_maker2 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 2)
        # p2 = Process(target=minibatch_maker2.epoch_factory, args=(q, ))
        # p2.start()
        # minibatch_maker3 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 3)
        # p3 = Process(target=minibatch_maker3.epoch_factory, args=(q, ))
        # p3.start()
        # minibatch_maker4 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 4)
        # p4 = Process(target=minibatch_maker4.epoch_factory, args=(q, ))
        # p4.start()
        # minibatch_maker5 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 5)
        # p5 = Process(target=minibatch_maker5.epoch_factory, args=(q, ))
        # p5.start()
        # minibatch_maker6 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 6)
        # p6 = Process(target=minibatch_maker6.epoch_factory, args=(q, ))
        # p6.start()
        # minibatch_maker7 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 7)
        # p7 = Process(target=minibatch_maker7.epoch_factory, args=(q, ))
        # p7.start()
        # minibatch_maker8 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 8)
        # p8 = Process(target=minibatch_maker8.epoch_factory, args=(q, ))
        # p8.start()
        # minibatch_maker9 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 9)
        # p9 = Process(target=minibatch_maker9.epoch_factory, args=(q, ))
        # p9.start()
        #train_len = q.get()
        minibatch_maker_valid = MinibatchMaker(
            self.FEATURE_TYPE, self.FLAG_GPU, "valid", text_div=0)
        p_valid = Process(
            target=minibatch_maker_valid.epoch_pickle, args=(q_valid, ))
        p_valid.start()
        minibatch_maker_valid1 = MinibatchMaker(
            self.FEATURE_TYPE, self.FLAG_GPU, "valid", text_div=1)
        p_valid1 = Process(
            target=minibatch_maker_valid1.epoch_pickle, args=(q_valid1, ))
        p_valid1.start()
        valid_len = q_valid.get()
        valid_len1 = q_valid1.get()
        #valid_len1 = 0
        #print("altsvm" + str(train_len))
        print("altsvm" + str(valid_len))
        print("altsvm" + str(valid_len1))
        # p.terminate()
        # p_valid.terminate()
        # exit()
        waited_count = 0
        verb_data_count = 0
        pseudo_epoch_count = 0
        train_dict_keep = None
        while waited_count < 100 and (len(loss_list) <= 10 or min(loss_list[-10:]) != loss_list[-10]):
            if not q.empty():
                # print("something")
                text_count += self.EPOCH_TEXT
                try_count = 0
                # while try_count < 5:
                #     try:
                #         try_count += 1
                #         print(str(q.full()))
                enc_words, fnn_inputs, dec_scores = q.get()
                # if text_sentence_vec_dict != None:
                #train_dict_keep = text_sentence_vec_dict
                # except Exception as e:
                #     print("cant_get")
                #     print(e)
                # if len(x_train) > 0:
                #     print("can_get")
                #     break
                # sys.exit()
                N = len(dec_scores)
                verb_data_count += N
                if N != 0:
                    # training
                    start_time_train = time.time()
                    perm = np.random.permutation(N)
                    sum_loss = 0
                    # print("first_verb")
                    for i in range(0, N, self.BATCH_SIZE):
                        # print(i)
                        if self.FLAG_GPU:
                            enc_words_batch = []
                            for x in perm[i:i + self.BATCH_SIZE]:
                                enc_words_batch.append(enc_words[x])
                                # enc_words_batch.append(
                                #     train_dict_keep[enc_words[x][0]][enc_words[x][1]])
                            # enc_words_batch = cuda.to_gpu(
                            #    np.array(enc_words_batch), device=0)
                            fnn_inputs_batch = cuda.to_gpu(fnn_inputs[
                                perm[i:i + self.BATCH_SIZE]], device=0)
                            dec_scores_batch = cuda.to_gpu(dec_scores[
                                perm[i:i + self.BATCH_SIZE]], device=0)
                        else:
                            enc_words_batch = []
                            for x in perm[i:i + self.BATCH_SIZE]:
                                enc_words_batch.append(enc_words[x])
                                # enc_words_batch.append(
                                #     train_dict_keep[enc_words[x][0]][enc_words[x][1]])
                            fnn_inputs_batch = fnn_inputs[
                                perm[i:i + self.BATCH_SIZE]]
                            dec_scores_batch = dec_scores[
                                perm[i:i + self.BATCH_SIZE]]

                        # modelのリセット
                        model.reset()
                        # 順伝播
                        model.encode(enc_words_batch)
                        # デコーダーの計算
                        loss = model.decode(fnn_inputs_batch, dec_scores_batch)
                        # print(loss)
                        sum_loss += loss.data * len(dec_scores_batch)
                        loss.backward()
                        opt.update()
                    # print("first_verb_finished")
                    average_loss = sum_loss / N
                    # train_losses.append(average_loss)
                    interval = int(time.time() - start_time_train)
                    #print("train実行時間: {}sec, N: {}".format(interval,N))

                # test
                # loss = model(x_test, y_test)
                # test_losses.append(loss.data)

                # output learning process
                if text_count % 100 == 0:
                    print("text_count: {} train loss: {} verb_data_count: {} time: {}".format(
                        text_count, average_loss, verb_data_count, time.ctime()))
                if verb_data_count // self.EPOCH_LIMIT > pseudo_epoch_count:
                    pseudo_epoch_count += 1
                    # print(verb_data_count)
                    # print(pseudo_epoch_count)

                    total_loss = 0
                    total_count = 0
                    valid_dict_keep = None
                    model.mode_change("test")
                    #chainer.config.train = False
                    valid_count = 0
                    valid1_count = 0
                    while (valid_count + valid1_count) < (valid_len + valid_len1):
                        if valid_count < valid_len and not q_valid.empty():
                            enc_words, fnn_inputs, dec_scores = q_valid.get()
                            valid_count += 1
                        elif valid1_count < valid_len1 and not q_valid1.empty():
                            enc_words, fnn_inputs, dec_scores = q_valid1.get()
                            valid1_count += 1
                        else:
                            print("waiting valid " + str(valid_count) +
                                  " " + str(valid1_count))
                            time.sleep(10)
                            continue
                        # if text_sentence_vec_dict != None:
                        #valid_dict_keep = text_sentence_vec_dict
                        if len(dec_scores) == 0:
                            continue
                        N = len(dec_scores)
                        for i in range(0, N, self.BATCH_SIZE):
                            if self.FLAG_GPU:
                                enc_words_batch = []
                                for x in enc_words[i:i + self.BATCH_SIZE]:
                                    enc_words_batch.append(x)
                                    # enc_words_batch.append(
                                    #     valid_dict_keep[x[0]][x[1]])
                                # enc_words_batch = cuda.to_gpu(
                                #    enc_words_batch, device=0)
                                fnn_inputs_batch = cuda.to_gpu(
                                    fnn_inputs[i:i + self.BATCH_SIZE], device=0)
                                dec_scores_batch = cuda.to_gpu(
                                    dec_scores[i:i + self.BATCH_SIZE], device=0)
                            else:
                                enc_words_batch = []
                                for x in enc_words[i:i + self.BATCH_SIZE]:
                                    enc_words_batch.append(x)
                                    # enc_words_batch.append(
                                    #     valid_dict_keep[x[0]][x[1]])
                                # enc_words_batch = cuda.to_gpu(
                                #    enc_words_batch, device=0)
                                fnn_inputs_batch = fnn_inputs[
                                    i:i + self.BATCH_SIZE]
                                dec_scores_batch = dec_scores[
                                    i:i + self.BATCH_SIZE]
                            # modelのリセット
                            model.reset()
                            if len(enc_words_batch) == 0:
                                print(len(enc_words))
                                print(len(dec_scores_batch))
                                print(i)
                                exit()

                            with chainer.no_backprop_mode():
                                # 順伝播
                                model.encode(enc_words_batch)
                                # デコーダーの計算
                                loss_data = model.decode(
                                    fnn_inputs_batch, dec_scores_batch).data
                                if not self.ARR.isnan(loss_data):
                                    total_loss += loss_data * \
                                        len(dec_scores_batch)
                                    total_count += len(dec_scores_batch)
                                else:
                                    print(loss_data)

                    if total_count == 0:
                        print("skipped")
                        continue
                    valid_loss = float(total_loss / total_count)
                    model.mode_change("train")
                    #chainer.config.train = True
                    # print(valid_loss)
                    # print(total_loss)
                    # print(total_count)
                    print("valid_count: {} valid loss: {} time: {}".format(
                        verb_data_count // self.EPOCH_LIMIT, valid_loss, time.ctime()))
                    try:
                        # with open("test", mode="wb") as f:
                        #    pickle.dump("hui",f)
                        # with open(self.OUTPUT_PATH.format("opt", self.FEATURE_TYPE, str(self.USE_DROPOUT), str(self.num_of_middle_layer), str(verb_count // self.EPOCH_LIMIT), valid_loss), mode="wb") as f:
                        #    pickle.dump(opt,f)
                        # print("will_save")
                        # model_saved=model.copy()
                        # model_saved.to_cpu()
                        # fui=float(70)
                        serializers.save_hdf5(  # "/gs/hs0/tga-cl/yamashiro-s-aa/workspace/nn/fnn/model/model",model)
                            self.OUTPUT_PATH.format("model", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, text_count, float(valid_loss)), model)
                        # print("model_saved")
                        serializers.save_hdf5(
                            self.OUTPUT_PATH.format("opt", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, text_count, float(valid_loss)), opt)
                    except Exception as e:
                        raise e
                    # print("saved")
                    loss_list.append(valid_loss)

                    # q_valid.put((x_valid, y_valid))
                waited_count = 0
            else:
                print("waiting")
                time.sleep(10)
                print(str(text_count) + " " + str(q.qsize()))
                waited_count += 1

        print("end")
        p.terminate()
        # p1.terminate()
        # p2.terminate()
        # p3.terminate()
        # p4.terminate()
        # p5.terminate()
        # p6.terminate()
        # p7.terminate()
        # p8.terminate()
        # p9.terminate()
        p_valid.terminate()
        p_valid1.terminate()
        interval = int(time.time() - start_time)
        print("実行時間: {}sec, last pseudo_epoch: {}".format(
            interval, str(verb_data_count // self.EPOCH_LIMIT)))
Ejemplo n.º 17
0
train_iter = iterators.SerialIterator(dataset_train,
                                      batch_size=args.batchsize,
                                      shuffle=trs)
if args.numval > 0:
    val_iter = iterators.SerialIterator(dataset_val,
                                        batch_size=len(dataset_val),
                                        repeat=False,
                                        shuffle=False)

# -- Set optimizers
optimizer1 = use_optimizer(lr=args.learning_rate)
optimizer1.setup(loss.phi)
optimizer2 = use_optimizer(lr=args.learning_rate)
optimizer2.setup(loss.net)
optimizer1.add_hook(optimizer_module.Lasso(args.beta))
optimizer2.add_hook(optimizer_module.WeightDecay(args.gamma))

# -- Set a trigger
if args.log_in_iteration:
    trigger = (1, 'iteration')
else:
    trigger = (1, 'epoch')

# -- Set a trainer
if args.fixed_embedder:
    optimizer_dict = {'net': optimizer2}
else:
    optimizer_dict = {'phi': optimizer1, 'net': optimizer2}
updater = lkis.Updater(train_iter,
                       optimizer_dict,
                       device=args.gpu,
Ejemplo n.º 18
0
                                          shuffle=True)
    validation_iter = iterators.SerialIterator(validation,
                                               batch_size=bold_val.shape[0],
                                               repeat=False,
                                               shuffle=False)

    linearmodel = RegressorZ(LinearRegression(bold_vox_dim, args.ndim_z),
                             pretrained_gan=dcgan,
                             featnet=alexnet)

    # Set up optimizer
    optim = optimizers.Adam()
    optim.setup(linearmodel)

    if args.do_weightdecay:
        optim.add_hook(optimizer.WeightDecay(args.l2_lambda))

    updater = training.StandardUpdater(train_iter,
                                       optim,
                                       device=args.gpu_device)

    # Set up trainer and extensions
    trainer = training.Trainer(updater, (args.nepochs, 'epoch'),
                               out=args.outdir)
    trainer.extend(
        extensions.Evaluator(validation_iter,
                             linearmodel,
                             device=args.gpu_device))
    trainer.extend(extensions.LogReport(log_name='linearmodel_train.log'))
    trainer.extend(
        extensions.PrintReport(
 def __init__(self, model):
     self.model = model
     self.optimizer = optimizers.Adam(lr)
     self.optimizer.setup(model)
     self.optimizer.add_hook(optimizer.WeightDecay(weight_decay))
Ejemplo n.º 20
0
def train(args):
    vocab = Vocabulary.from_conll(args.train, args.vocab)
    train_dataset = [conll_to_train(x, vocab) for x in read_conll(args.train)]
    dev_dataset = [conll_to_train(x, vocab) for x in read_conll(args.dev)]

    parser = Parser(args.vocab, args.embed, args.hidden)
    if args.gpu >= 0:
        parser.to_gpu()

    opt = optimizers.AdaGrad(lr=0.01)
    opt.setup(parser)
    opt.add_hook(optimizer.GradientClipping(10))
    opt.add_hook(optimizer.WeightDecay(0.0001))

    for epoch in range(args.epoch):
        random.shuffle(train_dataset)

        parser.zerograds()
        loss = XP.fzeros(())

        for i, data in enumerate(train_dataset):
            trace('epoch %3d: train sample %6d:' % (epoch + 1, i + 1))
            parent_scores, root_scores = parser.forward(data)
            if len(data) > 1:
                parent_scores = functions.split_axis(parent_scores, len(data),
                                                     0)
            else:
                parent_scores = (parent_scores, )

            root = -1
            for j, (p_scores, (wid,
                               parent)) in enumerate(zip(parent_scores, data)):
                if parent == -1:
                    trace('  %3d: root' % j)
                    root = j
                else:
                    parent_est = p_scores.data.argmax()
                    trace('%c %3d -> %3d (%3d)' %
                          ('*' if parent == parent_est else ' ', j, parent_est,
                           parent))
                    loss += functions.softmax_cross_entropy(
                        p_scores, XP.iarray([parent]))

            root_est = root_scores.data.argmax()
            trace('ROOT: %3d (%3d)' % (root_est, root))
            loss += functions.softmax_cross_entropy(root_scores,
                                                    XP.iarray([root]))

            if (i + 1) % 200 == 0:
                loss.backward()
                opt.update()
                parser.zerograds()
                loss = XP.fzeros(())

        loss.backward()
        opt.update()
        trace('epoch %3d: trained.                        ' % (epoch + 1))

        parent_num = 0
        parent_match = 0
        root_num = 0
        root_match = 0
        for i, data in enumerate(dev_dataset):
            trace('epoch %3d: dev sample %6d:' % (epoch + 1, i + 1),
                  rollback=True)
            parent_scores, root_scores = parser.forward(data)
            if len(data) > 1:
                parent_scores = functions.split_axis(parent_scores, len(data),
                                                     0)
            else:
                parent_scores = (parent_scores, )

            root = -1
            for j, (p_scores, (wid,
                               parent)) in enumerate(zip(parent_scores, data)):
                if parent == -1:
                    root = j
                else:
                    parent_est = p_scores.data.argmax()
                    parent_num += 1
                    parent_match += 1 if parent_est == parent else 0

            root_est = root_scores.data.argmax()
            root_num += 1
            root_match += 1 if root_est == root else 0

        result_str = \
          'epoch %3d: dev: parent-acc = %.4f (%5d/%5d), root-acc = %.4f (%4d/%4d)' % \
          ( \
            epoch + 1, \
            parent_match / parent_num, parent_match, parent_num, \
            root_match / root_num, root_match, root_num)
        trace(result_str)

        with open(args.model + '.log', 'a') as fp:
            print(result_str, file=fp)

        trace('epoch %3d: saving models ...' % (epoch + 1))
        prefix = args.model + '.%03d' % (epoch + 1)
        vocab.save(prefix + '.vocab')
        parser.save_spec(prefix + '.parent_spec')
        serializers.save_hdf5(prefix + '.parent_weights', parser)

    trace('finished.')
Ejemplo n.º 21
0
        print('mkdir ' + out_dir)
    assert os.path.isdir(out_dir)

    # setup network model, optimizer, and constant values to control training
    z_vec_dim = config.Z_VECTOR_DIM
    batch_size = config.BATCH_SIZE
    update_max = config.UPDATE_MAX
    update_save_params = config.UPDATE_SAVE_PARAMS
    kernel_dim = getattr(config, 'KERNEL_DIM', 1)
    kernel_eps = getattr(config, 'KERNEL_EPS', 1)

    model_dis = config.Discriminator()
    optimizer_dis = config.OPTIMIZER_DIS
    optimizer_dis.setup(model_dis)
    decay_d = getattr(config, 'DECAY_RATE_DIS', 1e-7)
    optimizer_dis.add_hook(optimizer.WeightDecay(decay_d))
    model_opt_set_dis = ModelOptimizerSet(model_dis, optimizer_dis)

    model_gen = config.Generator()
    optimizer_gen = config.OPTIMIZER_GEN
    optimizer_gen.setup(model_gen)
    decay_g = getattr(config, 'DECAY_RATE_GEN', 1e-7)
    optimizer_gen.add_hook(optimizer.WeightDecay(decay_g))
    model_opt_set_gen = ModelOptimizerSet(model_gen, optimizer_gen)

    # setup batch generator
    with open(args.dataset, 'r') as f:
        input_files = [line.strip() for line in f.readlines()]
    batch_generator = ImageBatchGenerator(input_files, batch_size,
                                          config.HEIGHT, config.WIDTH,
                                          channel=config.CHANNEL,
Ejemplo n.º 22
0
def train(args):
    trace('loading corpus ...')
    with open(args.source) as fp:
        trees = [make_tree(l) for l in fp]

    trace('extracting leaf nodes ...')
    word_lists = [extract_words(t) for t in trees]
    lower_lists = [[w.lower() for w in words] for words in word_lists]

    trace('extracting gold operations ...')
    op_lists = [make_operations(t) for t in trees]

    trace('making vocabulary ...')
    word_vocab = Vocabulary.new(lower_lists, args.vocab)
    phrase_set = set()
    semiterminal_set = set()
    for tree in trees:
        phrase_set |= set(extract_phrase_labels(tree))
        semiterminal_set |= set(extract_semiterminals(tree))
    phrase_vocab = Vocabulary.new([list(phrase_set)],
                                  len(phrase_set),
                                  add_special_tokens=False)
    semiterminal_vocab = Vocabulary.new([list(semiterminal_set)],
                                        len(semiterminal_set),
                                        add_special_tokens=False)

    trace('converting data ...')
    word_lists = [convert_word_list(x, word_vocab) for x in word_lists]
    op_lists = [
        convert_op_list(x, phrase_vocab, semiterminal_vocab) for x in op_lists
    ]

    trace('start training ...')
    parser = Parser(
        args.vocab,
        args.embed,
        args.char_embed,
        args.queue,
        args.stack,
        args.srstate,
        len(phrase_set),
        len(semiterminal_set),
    )
    if args.use_gpu:
        parser.to_gpu()
    opt = optimizers.SGD(lr=0.1)
    opt.setup(parser)
    opt.add_hook(optimizer.GradientClipping(10))
    opt.add_hook(optimizer.WeightDecay(0.0001))

    batch_set = list(zip(word_lists, op_lists))

    for epoch in range(args.epoch):
        n = 0
        random.shuffle(batch_set)

        for samples in batch(batch_set, args.minibatch):
            parser.zerograds()
            loss = XP.fzeros(())

            for word_list, op_list in zip(*samples):
                trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1))
                loss += parser.forward_train(word_list, op_list)
                n += 1

            loss.backward()
            opt.update()

        trace('saving model ...')
        prefix = args.model + '.%03.d' % (epoch + 1)
        word_vocab.save(prefix + '.words')
        phrase_vocab.save(prefix + '.phrases')
        semiterminal_vocab.save(prefix + '.semiterminals')
        parser.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', parser)

        opt.lr *= 0.92

    trace('finished.')
Ejemplo n.º 23
0
        optimizer_generator.setup(G.Vanilla())
    else:  # Use residual blocks
        print('Generator: Residual (N={})'.format(g_res))
        optimizer_generator.setup(G.Residual(n=g_res,
                                             out_shape=train[0].shape))

    if d_res < 0:  # No residual blocks
        print('Discriminator: Standard (Minibatch Discrimination={})'.format(
            mbd))
        optimizer_discriminator.setup(D.Vanilla(use_mbd=mbd))
    else:  # Use residual blocks
        print('Discriminator: Residual (N={}, Minibatch Discrimination={})'.
              format(d_res, mbd))
        optimizer_discriminator.setup(D.Residual(n=d_res, use_mbd=mbd))

    optimizer_generator.add_hook(optimizer.WeightDecay(g_weight_decay))
    optimizer_discriminator.add_hook(optimizer.WeightDecay(d_weight_decay))

    updater = GenerativeAdversarialUpdater(
        iterator=train_iter,
        noise_iterator=z_iter,
        optimizer_generator=optimizer_generator,
        optimizer_discriminator=optimizer_discriminator,
        device=gpu)

    trainer = training.Trainer(updater,
                               stop_trigger=(epochs, 'epoch'),
                               out=out)

    # Logging losses to result/logs/loss
    trainer.extend(