Example #1
0
def optimizer(opt_str):
    """
    入力文字列からオプティマイザを推定する
    """

    if (opt_str.lower() == 'adam'):
        opt = O.Adam(amsgrad=True)
    elif (opt_str.lower() == 'ada_d'):
        opt = O.AdaDelta()
    elif (opt_str.lower() == 'ada_g'):
        opt = O.AdaGrad()
    elif (opt_str.lower() == 'm_sgd'):
        opt = O.MomentumSGD()
    elif (opt_str.lower() == 'n_ag'):
        opt = O.NesterovAG()
    elif (opt_str.lower() == 'rmsp'):
        opt = O.RMSprop()
    elif (opt_str.lower() == 'rmsp_g'):
        opt = O.RMSpropGraves()
    elif (opt_str.lower() == 'sgd'):
        opt = O.SGD()
    elif (opt_str.lower() == 'smorms'):
        opt = O.SMORMS3()
    else:
        opt = O.Adam(amsgrad=True)
        logger.warning('{}->{}'.format(opt_str, opt.__doc__.split('.')[0]))

    logger.debug('Optimizer: {}'.format(opt.__doc__.split('.')[0]))
    return opt
Example #2
0
def optimizer(opt_str):
    """
    入力文字列からオプティマイザを推定する
    """

    if(opt_str.lower() == 'adam'):
        opt = O.Adam(amsgrad=True)
    elif(opt_str.lower() == 'ada_d'):
        opt = O.AdaDelta()
    elif(opt_str.lower() == 'ada_g'):
        opt = O.AdaGrad()
    elif(opt_str.lower() == 'm_sgd'):
        opt = O.MomentumSGD()
    elif(opt_str.lower() == 'n_ag'):
        opt = O.NesterovAG()
    elif(opt_str.lower() == 'rmsp'):
        opt = O.RMSprop()
    elif(opt_str.lower() == 'rmsp_g'):
        opt = O.RMSpropGraves()
    elif(opt_str.lower() == 'sgd'):
        opt = O.SGD()
    elif(opt_str.lower() == 'smorms'):
        opt = O.SMORMS3()
    else:
        opt = O.Adam(amsgrad=True)
        print('\n[Warning] {0}\n\t{1}->{2}\n'.format(
            fileFuncLine(), opt_str, opt.__doc__.split('.')[0])
        )

    print('Optimizer:', opt.__doc__.split('.')[0])
    return opt
Example #3
0
def get_opt(args):
    if args.opt_model == "SGD":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        return optimizers.SGD(lr=alpha0)
    if args.opt_model == "AdaGrad":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        return optimizers.AdaGrad(lr=alpha0)
    if args.opt_model == "AdaDelta":
        alpha0 = 0.95 if args.alpha0 == 0 else args.alpha0
        alpha1 = 1e-06 if args.alpha1 == 0 else args.alpha1
        return optimizers.AdaDelta(rho=alpha0, eps=alpha1)
    if args.opt_model == "Momentum":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        return optimizers.MomentumSGD(lr=alpha0, momentum=alpha1)
    if args.opt_model == "NAG":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        return optimizers.NesterovAG(lr=alpha0, momentum=alpha1)
    if args.opt_model == "RMS":
        return optimizers.RMSpropGraves()
    if args.opt_model == "SM":
        return optimizers.SMORMS3()
    if args.opt_model == "Adam":  # default case
        alpha0 = 0.001 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        alpha2 = 0.999 if args.alpha2 == 0 else args.alpha2
        alpha3 = 1e-08 if args.alpha3 == 0 else args.alpha3
        return optimizers.Adam(alpha=alpha0,
                               beta1=alpha1,
                               beta2=alpha2,
                               eps=alpha3)
    print('no such optimization method', args.opt_model)
    sys.exit(1)
Example #4
0
 def get_optimizer(self, name, lr, momentum=0.9):
     if name.lower() == "adam":
         return optimizers.Adam(alpha=lr, beta1=momentum)
     if name.lower() == "smorms3":
         return optimizers.SMORMS3(lr=lr)
     if name.lower() == "adagrad":
         return optimizers.AdaGrad(lr=lr)
     if name.lower() == "adadelta":
         return optimizers.AdaDelta(rho=momentum)
     if name.lower() == "nesterov" or name.lower() == "nesterovag":
         return optimizers.NesterovAG(lr=lr, momentum=momentum)
     if name.lower() == "rmsprop":
         return optimizers.RMSprop(lr=lr, alpha=momentum)
     if name.lower() == "momentumsgd":
         return optimizers.MomentumSGD(lr=lr, mommentum=mommentum)
     if name.lower() == "sgd":
         return optimizers.SGD(lr=lr)
Example #5
0
def get_optimizer(name):
    """
    :type name: str
    :rtype: chainer.Optimizer
    """
    if name == "adadelta":
        opt = optimizers.AdaDelta()
    elif name == "adagrad":
        opt = optimizers.AdaGrad()
    elif name == "adam":
        opt = optimizers.Adam()
    elif name == "rmsprop":
        opt = optimizers.RMSprop()
    elif name == "smorms3":
        opt = optimizers.SMORMS3()
    else:
        raise ValueError("Unknown optimizer_name=%s" % name)
    return opt
def setOptimizer(model, method, params):
    learningRate = params['learningRate'] if (
        params.has_key('learningRate')) else 0.001
    alpha = params['alpha'] if (params.has_key('alpha')) else 0.001
    if (method == 'adam'):
        optimizer = optimizers.Adam(alpha=alpha)
    elif (method == 'smorms3'):
        optimizer = optimizers.SMORMS3(lr=learningRate)
    elif (method == 'rmsprop'):
        optimizer = optimizers.RMSprop(lr=learningRate)
    elif (method == 'sgd'):
        optimizer = optimizers.SGD(lr=learningRate)
    elif (method == 'momentum'):
        optimizer = optimizers.MomentumSGD(lr=learningRate)
    elif (method == 'adagrad'):
        optimizer = optimizers.AdaGrad(lr=learningRate)
    elif (method == 'adadelta'):
        optimizer = optimizers.AdaDelta()
    optimizer.setup(model)
    return optimizer
 def create(self):
     return optimizers.SMORMS3(0.1)
Example #8
0
def main(args):
    gpu = args.gpu
    path_config = args.config
    mode = args.mode
    path_word2vec = args.word2vec
    curriculum = False if args.curriculum == 0 else True
    
    # Hyper parameters (const)
    MAX_EPOCH = 10000000000
    MAX_PATIENCE = 20
    EVAL = 10000
    if curriculum:
        LENGTH_LIMITS = [10, 20, 30, 40, 50] # NOTE: experimental
    else:
        LENGTH_LIMITS = [50]

    config = utils.Config(path_config)
    
    # Preparaton
    path_corpus_train = config.getpath("prep_corpus") + ".train"
    path_corpus_val = config.getpath("prep_corpus") + ".val"
    basename = "won.%s.%s" % (
                    os.path.basename(path_corpus_train),
                    os.path.splitext(os.path.basename(path_config))[0])
    path_snapshot = os.path.join(config.getpath("snapshot"), basename + ".model")
    path_snapshot_vectors = os.path.join(config.getpath("snapshot"), basename + ".vectors.txt")
    if mode == "train":
        path_log = os.path.join(config.getpath("log"), basename + ".log")
        utils.set_logger(path_log)
    elif mode == "evaluation":
        path_evaluation = os.path.join(config.getpath("evaluation"), basename + ".txt")
        utils.set_logger(path_evaluation)
    elif mode == "analysis":
        path_analysis = os.path.join(config.getpath("analysis"), basename)

    utils.logger.debug("[info] TRAINING CORPUS: %s" % path_corpus_train)
    utils.logger.debug("[info] VALIDATION CORPUS: %s" % path_corpus_val)
    utils.logger.debug("[info] CONFIG: %s" % path_config)
    utils.logger.debug("[info] PRE-TRAINED WORD EMBEDDINGS: %s" % path_word2vec)
    utils.logger.debug("[info] SNAPSHOT (MODEL): %s " % path_snapshot)
    utils.logger.debug("[info] SNAPSHOT (WORD EMBEDDINGS): %s " % path_snapshot_vectors)
    if mode == "train":
        utils.logger.debug("[info] LOG: %s" % path_log)
    elif mode == "evaluation":
        utils.logger.debug("[info] EVALUATION: %s" % path_evaluation)
    elif mode == "analysis":
        utils.logger.debug("[info] ANALYSIS: %s" % path_analysis)

    # Hyper parameters
    word_dim = config.getint("word_dim")
    state_dim = config.getint("state_dim")
    aggregation = config.getstr("aggregation")
    attention = config.getstr("attention")
    retrofitting = config.getbool("retrofitting")
    alpha = config.getfloat("alpha")
    scale = config.getfloat("scale")
    identity_penalty = config.getbool("identity_penalty")
    lmd = config.getfloat("lambda")
    grad_clip = config.getfloat("grad_clip")
    weight_decay = config.getfloat("weight_decay")
    batch_size = config.getint("batch_size")

    utils.logger.debug("[info] WORD DIM: %d" % word_dim)
    utils.logger.debug("[info] STATE DIM: %d" % state_dim)
    utils.logger.debug("[info] AGGREGATION METHOD: %s" % aggregation)
    utils.logger.debug("[info] ATTENTION METHOD: %s" % attention)
    utils.logger.debug("[info] RETROFITTING: %s" % retrofitting)
    utils.logger.debug("[info] ALPHA = %f" % alpha) 
    utils.logger.debug("[info] SCALE: %f" % scale)
    utils.logger.debug("[info] IDENTITY PENALTY: %s" % identity_penalty)
    utils.logger.debug("[info] LAMBDA: %f" % lmd)
    utils.logger.debug("[info] GRADIENT CLIPPING: %f" % grad_clip)
    utils.logger.debug("[info] WEIGHT DECAY: %f" % weight_decay)
    utils.logger.debug("[info] BATCH SIZE: %d" % batch_size)

    if retrofitting:
        assert path_word2vec is not None

    # Data preparation
    corpus_train_list = [
        load_corpus(
                path_corpus_train,
                vocab=path_corpus_train + ".vocab",
                max_length=length_limit)
        for length_limit in LENGTH_LIMITS]
    corpus_val = load_corpus(
                path_corpus_val,
                vocab=corpus_train_list[0].vocab,
                max_length=LENGTH_LIMITS[-1])

    # Model preparation 
    if (mode == "train") and (path_word2vec is not None):
        initialW_data = utils.load_word2vec_weight_matrix(
                                    path_word2vec,
                                    word_dim,
                                    corpus_train_list[0].vocab,
                                    scale)
    else:
        initialW_data = None
    cuda.get_device(gpu).use()
    model = models.WON(
                vocab_size=len(corpus_train_list[0].vocab),
                word_dim=word_dim,
                state_dim=state_dim,
                aggregation=aggregation,
                attention=attention,
                initialW=initialW_data,
                EOS_ID=corpus_train_list[0].vocab["<EOS>"])
    if mode != "train":
        serializers.load_npz(path_snapshot, model)
    model.to_gpu(gpu)
    
    # Training/Evaluation/Analysis
    if mode == "train":
        length_index = 0
        utils.logger.debug("[info] Evaluating on the validation set ...")
        loss, acc = evaluate(model, corpus_val,
                                lmd, identity_penalty)
        utils.logger.debug("[validation] iter=0, epoch=0, max_length=%d, loss=%.03f, accuracy=%.2f%%" % \
                                (LENGTH_LIMITS[length_index], loss, acc*100))
        for _ in np.random.randint(0, len(corpus_val), 10):
            s = corpus_val.random_sample()
            batch_sents = [s]
            batch_labels = make_labels(batch_sents)
            _, order_pred = model.forward(batch_sents, train=False)
            order_pred = [a[0] for a in order_pred]
            order_gold = batch_labels[0]
            s = [corpus_val.ivocab[w] for w in s]
            s_pred = utils.reorder(s, order_pred)
            s_gold = utils.reorder(s, order_gold)
            s_pred = " ".join(s_pred).encode("utf-8")
            s_gold = " ".join(s_gold).encode("utf-8")
            utils.logger.debug("[check] <Gold> %s" % s_gold)
            utils.logger.debug("[check] <Pred> %s" % s_pred)
            utils.logger.debug("[check] <Gold:order> %s" % order_gold)
            utils.logger.debug("[check] <Pred:order> %s" % order_pred)
        # training & validation
        opt = optimizers.SMORMS3()
        opt.setup(model)
        opt.add_hook(chainer.optimizer.GradientClipping(grad_clip))
        opt.add_hook(chainer.optimizer.WeightDecay(weight_decay))
        # best_acc = -1.0
        best_acc = acc
        patience = 0
        it = 0
        n_train = len(corpus_train_list[0]) # TODO
        finish_training = False
        for epoch in xrange(1, MAX_EPOCH+1): 
            if finish_training:
                break
            for data_i in xrange(0, n_train, batch_size):
                if data_i + batch_size > n_train:
                    break
                # data preparation
                batch_sents = corpus_train_list[length_index].next_batch(size=batch_size)
                batch_labels = make_labels(batch_sents)
                # forward
                loss, acc = forward(model, batch_sents, batch_labels,
                                    lmd, identity_penalty,
                                    train=True)
                # TODO: BEGIN
                if retrofitting:
                    part_indices_data = np.asarray(list(
                        set([w for s_ in batch_sents for w in s_])
                        ))
                    part_initialW_data = initialW_data[part_indices_data]
                
                    part_indices = Variable(cuda.cupy.asarray(part_indices_data, dtype=np.int32),
                                            volatile=False)
                    part_initialW = Variable(cuda.cupy.asarray(part_initialW_data, dtype=np.float32),
                                            volatile=False)
                    loss_ret = frobenius_squared_error(model.embed(part_indices), part_initialW)
                else:
                    loss_ret = 0.0
                loss = loss + alpha * loss_ret
                # TODO: END
                # backward & update
                model.zerograds()
                loss.backward()
                loss.unchain_backward()
                opt.update()
                it += 1
                # log
                loss = float(cuda.to_cpu(loss.data))
                acc = float(cuda.to_cpu(acc.data))
                utils.logger.debug("[training] iter=%d, epoch=%d (%d/%d=%.03f%%), max_length=%d, loss=%.03f, accuracy=%.2f%%" % \
                                    (it, epoch, 
                                    data_i+batch_size,
                                    n_train,
                                    float(data_i+batch_size)/n_train * 100,
                                    LENGTH_LIMITS[length_index],
                                    loss,
                                    acc*100))
                if it % EVAL == 0: 
                    # validation
                    utils.logger.debug("[info] Evaluating on the validation set ...")
                    loss, acc = evaluate(model, corpus_val,
                                            lmd, identity_penalty)
                    utils.logger.debug("[validation] iter=%d, epoch=%d, max_length=%d, loss=%.03f, accuracy=%.2f%%" % \
                                            (it, epoch, LENGTH_LIMITS[length_index], loss, acc*100))
                    for _ in np.random.randint(0, len(corpus_val), 10):
                        s = corpus_val.random_sample()
                        batch_sents = [s]
                        batch_labels = make_labels(batch_sents)
                        _, order_pred = model.forward(batch_sents, train=False)
                        order_pred = [a[0] for a in order_pred]
                        order_gold = batch_labels[0]
                        s = [corpus_val.ivocab[w] for w in s]
                        s_pred = utils.reorder(s, order_pred)
                        s_gold = utils.reorder(s, order_gold)
                        s_pred = " ".join(s_pred).encode("utf-8")
                        s_gold = " ".join(s_gold).encode("utf-8")
                        utils.logger.debug("[check] <Gold> %s" % s_gold)
                        utils.logger.debug("[check] <Pred> %s" % s_pred)
                        utils.logger.debug("[check] <Gold:order> %s" % order_gold)
                        utils.logger.debug("[check] <Pred:order> %s" % order_pred)

                    if best_acc < acc:
                        # save
                        utils.logger.debug("[info] Best accuracy is updated: %.2f%% => %.2f%%" % (best_acc*100.0, acc*100.0))
                        best_acc = acc
                        patience = 0
                        serializers.save_npz(path_snapshot, model)
                        serializers.save_npz(path_snapshot + ".opt", opt)
                        save_word2vec(path_snapshot_vectors, extract_word2vec(model, corpus_train_list[length_index].vocab))
                        utils.logger.debug("[info] Saved.")
                    else:
                        patience += 1
                        utils.logger.debug("[info] Patience: %d (best accuracy: %.2f%%)" % (patience, best_acc*100.0))
                        if patience >= MAX_PATIENCE:
                            if curriculum and (length_index != len(LENGTH_LIMITS)-1):
                                length_index += 1
                                break
                            else:
                                utils.logger.debug("[info] Patience %d is over. Training finished." \
                                        % patience)
                                finish_training = True
                                break
    elif mode == "evaluation":
        pass
    elif mode == "analysis":
        utils.mkdir(path_analysis)
        f = open(os.path.join(path_analysis, "dump.txt"), "w")
        data_i = 0
        for s in pyprind.prog_bar(corpus_val):
            # NOTE: analysisの場合は, 文長を気にせずすべて解かせる
            batch_sents = [s]
            batch_labels = make_labels(batch_sents)
            _, order_pred = model.forward(batch_sents, train=False)
            order_pred = [a[0] for a in order_pred]
            order_gold = batch_labels[0]
            s = [corpus_val.ivocab[w] for w in s]
            s_pred = utils.reorder(s, order_pred)
            s_gold = utils.reorder(s, order_gold)
            s_pred = " ".join(s_pred).encode("utf-8")
            s_gold = " ".join(s_gold).encode("utf-8")
            f.write("[%d] <Gold> %s\n" % (data_i+1, s_gold))
            f.write("[%d] <Pred> %s\n" % (data_i+1, s_pred))
            f.write("[%d] <Gold:order> %s\n" % (data_i+1, order_gold))
            f.write("[%d] <Pred:order> %s\n" % (data_i+1, order_pred))
            data_i += 1
        f.flush()
        f.close()

    utils.logger.debug("[info] Done.")
Example #9
0
    t1[done, :] = 0
    tt = r1 + (gamma * cp.max(t1, axis=1))
    t[a0[:, None] == cp.arange(t.shape[1])] = tt
    #foo[ind[:,None] == range(foo.shape[1])] = bar
    train = [(s0[i], t[i]) for i in range(t.shape[0])]
    return train


env = gym.make('CartPole-v1')
state_size = env.observation_space.shape[0]
n_actions = env.action_space.n
q_func = MLP(32, n_actions)
q_func.to_gpu(0)

model = QClassifier(q_func)
optimizer = optimizers.SMORMS3(1e-2)
optimizer.setup(model)
#optimizer.add_hook(chainer.optimizer.WeightDecay(0.001))
#optimizer.add_hook(chainer.optimizer.GradientNoise(0.001))

replay_buffer = []
s0_stack = cp.array([], dtype=cp.float32).reshape(0, state_size)
a0_stack = cp.array([], dtype=cp.int32)
r1_stack = cp.array([], dtype=cp.float32)
s1_stack = cp.array([], dtype=cp.float32).reshape(0, state_size)
done_stack = cp.array([], dtype=cp.bool_)

for i in range(1, TRAINING_EPISODES + 1):
    s0 = env.reset()
    s0 = cp.array(s0, dtype=DTYPE)
    r0 = 0
    elif args.adagrad:
        optimizer = optimizers.AdaGrad()
    elif args.amsgrad:
        optimizer = optimizers.AMSGrad()
    elif args.amsbound:
        optimizer = optimizers.AMSBound()
    elif args.correctedmomentsgd:
        optimizer = optimizers.CorrectedMomentumSGD()
    elif args.nesterovag:
        optimizer = optimizers.NesterovAG()
    elif args.msvag:
        optimizer = optimizers.MSVAG()
    elif args.rmspropgraves:
        optimizer = optimizers.RMSpropGraves()
    elif args.smorms3:
        optimizer = optimizers.SMORMS3()
    else:
        optimizer = optimizers.AdaDelta()

    optimizer.setup(net)

    if args.lasso:
        #Lasso回帰でスパース化
        from chainer.optimizer_hooks import Lasso
        for param in net.params():
            if param.name != 'b':
                param.update_rule.add_hook(Lasso(decay))
    else:
        #Ridge回帰で過学習抑制
        from chainer.optimizer_hooks import WeightDecay
        for param in net.params():
Example #11
0
def main(gpu, path_corpus, path_config, path_word2vec):
    MAX_EPOCH = 50
    EVAL = 200
    MAX_LENGTH = 70

    config = utils.Config(path_config)
    model_name = config.getstr("model")
    word_dim = config.getint("word_dim")
    state_dim = config.getint("state_dim")
    grad_clip = config.getfloat("grad_clip")
    weight_decay = config.getfloat("weight_decay")
    batch_size = config.getint("batch_size")

    print "[info] CORPUS: %s" % path_corpus
    print "[info] CONFIG: %s" % path_config
    print "[info] PRE-TRAINED WORD EMBEDDINGS: %s" % path_word2vec
    print "[info] MODEL: %s" % model_name
    print "[info] WORD DIM: %d" % word_dim
    print "[info] STATE DIM: %d" % state_dim
    print "[info] GRADIENT CLIPPING: %f" % grad_clip
    print "[info] WEIGHT DECAY: %f" % weight_decay
    print "[info] BATCH SIZE: %d" % batch_size

    path_save_head = os.path.join(
        config.getpath("snapshot"),
        "rnnlm.%s.%s" % (os.path.basename(path_corpus),
                         os.path.splitext(os.path.basename(path_config))[0]))
    print "[info] SNAPSHOT: %s" % path_save_head

    sents_train, sents_val, vocab, ivocab = \
            utils.load_corpus(path_corpus=path_corpus, max_length=MAX_LENGTH)

    if path_word2vec is not None:
        word2vec = utils.load_word2vec(path_word2vec, word_dim)
        initialW = utils.create_word_embeddings(vocab,
                                                word2vec,
                                                dim=word_dim,
                                                scale=0.001)
    else:
        initialW = None

    cuda.get_device(gpu).use()
    if model_name == "rnn":
        model = models.RNN(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=initialW,
                           EOS_ID=vocab["<EOS>"])
    elif model_name == "lstm":
        model = models.LSTM(vocab_size=len(vocab),
                            word_dim=word_dim,
                            state_dim=state_dim,
                            initialW=initialW,
                            EOS_ID=vocab["<EOS>"])
    elif model_name == "gru":
        model = models.GRU(vocab_size=len(vocab),
                           word_dim=word_dim,
                           state_dim=state_dim,
                           initialW=initialW,
                           EOS_ID=vocab["<EOS>"])
    elif model_name == "bd_lstm":
        model = models.BD_LSTM(vocab_size=len(vocab),
                               word_dim=word_dim,
                               state_dim=state_dim,
                               initialW=initialW,
                               EOS_ID=vocab["<EOS>"])
    else:
        print "[error] Unknown model name: %s" % model_name
        sys.exit(-1)
    model.to_gpu(gpu)

    opt = optimizers.SMORMS3()
    opt.setup(model)
    opt.add_hook(chainer.optimizer.GradientClipping(grad_clip))
    opt.add_hook(chainer.optimizer.WeightDecay(weight_decay))

    print "[info] Evaluating on the validation sentences ..."
    loss_data, acc_data = evaluate(model, model_name, sents_val, ivocab)
    perp = math.exp(loss_data)
    print "[validation] iter=0, epoch=0, perplexity=%f, accuracy=%.2f%%" \
        % (perp, acc_data*100)

    it = 0
    n_train = len(sents_train)
    vocab_size = model.vocab_size
    for epoch in xrange(1, MAX_EPOCH + 1):
        perm = np.random.permutation(n_train)
        for data_i in xrange(0, n_train, batch_size):
            if data_i + batch_size > n_train:
                break
            words = sents_train[perm[data_i:data_i + batch_size]]

            if model_name == "bd_lstm":
                xs, ms = utils.make_batch(words,
                                          train=True,
                                          tail=False,
                                          mask=True)
                ys = model.forward(xs=xs, ms=ms, train=True)
            else:
                xs = utils.make_batch(words, train=True, tail=False)
                ys = model.forward(ts=xs, train=True)

            ys = F.concat(ys, axis=0)
            ts = F.concat(xs, axis=0)
            ys = F.reshape(ys, (-1, vocab_size))  # (TN, |V|)
            ts = F.reshape(ts, (-1, ))  # (TN,)

            loss = F.softmax_cross_entropy(ys, ts)
            acc = F.accuracy(ys, ts, ignore_label=-1)

            model.zerograds()
            loss.backward()
            loss.unchain_backward()
            opt.update()
            it += 1

            loss_data = float(cuda.to_cpu(loss.data))
            perp = math.exp(loss_data)
            acc_data = float(cuda.to_cpu(acc.data))
            print "[training] iter=%d, epoch=%d (%d/%d=%.03f%%), perplexity=%f, accuracy=%.2f%%" \
                    % (it, epoch, data_i+batch_size, n_train,
                        float(data_i+batch_size)/n_train*100,
                        perp, acc_data*100)

            if it % EVAL == 0:
                print "[info] Evaluating on the validation sentences ..."
                loss_data, acc_data = evaluate(model, model_name, sents_val,
                                               ivocab)
                perp = math.exp(loss_data)
                print "[validation] iter=%d, epoch=%d, perplexity=%f, accuracy=%.2f%%" \
                        % (it, epoch, perp, acc_data*100)

                serializers.save_npz(
                    path_save_head + ".iter_%d.epoch_%d.model" % (it, epoch),
                    model)
                utils.save_word2vec(
                    path_save_head + ".iter_%d.epoch_%d.vectors.txt" %
                    (it, epoch), utils.extract_word2vec(model, vocab))
                print "[info] Saved."

    print "[info] Done."
Example #12
0
def main(gpu, path_corpus, path_config, path_word2vec):
    MAX_EPOCH = 50
    EVAL = 200
    MAX_LENGTH = 70
    COUNTS_CACHE = "./cache/counts.pkl"
    
    config = utils.Config(path_config)
    word_dim = config.getint("word_dim") 
    state_dim = config.getint("state_dim")
    grad_clip = config.getfloat("grad_clip")
    weight_decay = config.getfloat("weight_decay")
    batch_size = config.getint("batch_size")
    sample_size = config.getint("sample_size")
    
    print "[info] CORPUS: %s" % path_corpus
    print "[info] CONFIG: %s" % path_config
    print "[info] PRE-TRAINED WORD EMBEDDINGS: %s" % path_word2vec
    print "[info] WORD DIM: %d" % word_dim
    print "[info] STATE DIM: %d" % state_dim
    print "[info] GRADIENT CLIPPING: %f" % grad_clip
    print "[info] WEIGHT DECAY: %f" % weight_decay
    print "[info] BATCH SIZE: %d" % batch_size

    path_save_head = os.path.join(config.getpath("snapshot"),
            "rnnlm.%s.%s" % (
                os.path.basename(path_corpus),
                os.path.splitext(os.path.basename(path_config))[0]))
    print "[info] SNAPSHOT: %s" % path_save_head
    
    sents_train, sents_val, vocab, ivocab = \
            utils.load_corpus(path_corpus=path_corpus, max_length=MAX_LENGTH)

    #counts = None

    #print("[info] Load word counter")
    #if os.path.exists(COUNTS_CACHE):
    #    print("[info] Found cache of counter")
    #    counts = pickle.load(open(COUNTS_CACHE, "rb"))

    #    if len(counts) != len(vocab):
    #        counts = None

    #if counts is None:
    #    counts = Counter()

    #    for sent in list(sents_train) + list(sents_val):
    #        counts += Counter(sent)

    #    pickle.dump(counts, open(COUNTS_CACHE, "wb"))

    #cs = [counts[w] for w in range(len(counts))]

    if path_word2vec is not None:
        word2vec = utils.load_word2vec(path_word2vec, word_dim)
        initialW = utils.create_word_embeddings(vocab, word2vec, dim=word_dim, scale=0.001)
    else:
        initialW = None

    cuda.get_device(gpu).use()

    model = models.CXT_BLSTM(
            vocab_size=len(vocab),
            word_dim=word_dim,
            state_dim=state_dim,
            initialW=initialW,
            EOS_ID=vocab["<EOS>"])

    model.to_gpu(gpu)

    opt = optimizers.SMORMS3()
    opt.setup(model)
    opt.add_hook(chainer.optimizer.GradientClipping(grad_clip))
    opt.add_hook(chainer.optimizer.WeightDecay(weight_decay))

    # sampler = utils.RandomSampler(cs, sample_size)

    #print "[info] Evaluating on the validation sentences ..."
    #loss_data = evaluate(model, sents_val, ivocab, word_dim, sampler)
    #print "[validation] iter=0, epoch=0, loss=%f" \
    #    % (loss_data)
    
    it = 0
    n_train = len(sents_train)
    vocab_size = model.vocab_size

    for epoch in xrange(1, MAX_EPOCH+1):
        perm = np.random.permutation(n_train)
        for data_i in xrange(0, n_train, batch_size):
            if data_i + batch_size > n_train:
                break
            words = sents_train[perm[data_i:data_i+batch_size]]
            xs, ms = utils.make_batch(words, train=True, tail=False, mask=True)

            ys = model.forward(xs=xs, ms=ms, train=True)
            
            words_without_edge = [w[1:-1] for w in words]
            xs_without_edge, ms_without_edge = utils.make_batch(words_without_edge, train=True, tail=False, mask=True)

            masked_ys = []
            for y, m in zip(ys, ms_without_edge):
                m_ext = F.broadcast_to(F.reshape(m, (batch_size, 1)), (batch_size, vocab_size))
                masked_ys.append(y*m_ext)

            #ts = model.embed_words(xs_without_edge, ms_without_edge, train=True) # BOS, EOSは除く

            #  T : バッチの中の最大長
            #  N : バッチサイズ
            # |D|: word_dim
            ys = F.concat(masked_ys, axis=0) # (TN, |V|)
            ts = F.concat(xs_without_edge, axis=0) # (TN, |D|)

            ys = F.reshape(ys, (-1, vocab_size)) # (TN, |D|)
            ts = F.reshape(ts, (-1,)) # (TN,)

            loss = F.softmax_cross_entropy(ys, ts)
            acc = F.accuracy(ys, ts, ignore_label=-1)
        
            model.zerograds()
            loss.backward()
            loss.unchain_backward()
            opt.update()
            it += 1

            loss_data = float(cuda.to_cpu(loss.data))
            perp = math.exp(loss_data)
            acc_data = float(cuda.to_cpu(acc.data))

            print "[training] iter=%d, epoch=%d (%d/%d=%.03f%%), perplexity=%f, accuracy=%.2f%%" \
                    % (it, epoch, data_i+batch_size, n_train,
                        float(data_i+batch_size)/n_train*100,
                        perp, acc_data*100)

            if it % EVAL == 0:
                print "[info] Evaluating on the validation sentences ..."
                loss_data, acc_data = evaluate(model, sents_val, ivocab, word_dim)
                perp = math.exp(loss_data)
                print "[validation] iter=%d, epoch=%d, perplexity=%f, accuracy=%.2f%%" \
                        % (it, epoch, perp, acc_data*100)

                serializers.save_npz(path_save_head + ".iter_%d.epoch_%d.model" % (it, epoch),
                        model)
                # utils.save_word2vec(path_save_head + ".iter_%d.epoch_%d.vectors.txt" % (it, epoch),
                #         utils.extract_word2vec(model, vocab))
                print "[info] Saved."

    print "[info] Done."
                             jvi_order,
                             device=gpu_id)
    elbo = model.ELBOObjective(encoder, decoder, zcount)
elif vae_type == "is":
    vae = model.ISObjective(encoder, decoder, zcount)
else:
    sys.exit("Unsupported VAE type (%s)." % vae_type)

lr = float(args['--lr'])
print "Using initial learning rate %f" % lr
opt_type = args['--opt']
if opt_type == "adam":
    opt = optimizers.Adam(alpha=lr)
    opt_elbo = optimizers.Adam(alpha=lr)
elif opt_type == "smorms3":
    opt = optimizers.SMORMS3(lr=lr)
    opt_elbo = optimizers.SMORMS3(lr=lr)
elif opt_type == "sgd":
    opt = optimizers.SGD(lr=lr)
    opt_elbo = optimizers.SGD(lr=lr)
else:
    sys.exit("Unsupported optimizer type (%s)." % opt_type)

opt.setup(vae)
opt.add_hook(chainer.optimizer.GradientClipping(4.0))

if elbo:
    opt_elbo.setup(elbo)
    opt_elbo.add_hook(chainer.optimizer.GradientClipping(4.0))

# Move to GPU