def init_model(vocab_size, char_type_size):
    model = FunctionSet(
        embed=F.EmbedID(vocab_size, embed_units),
        char_type_embed=F.EmbedID(char_type_size, char_type_embed_units),
        #dict_embed = F.Linear(12, dict_embed_units),
        hidden1=F.Linear(
            window * (embed_units + char_type_embed_units) * 3 + hidden_units,
            hidden_units),
        i_gate=F.Linear(
            window * (embed_units + char_type_embed_units) * 3 + hidden_units,
            hidden_units),
        f_gate=F.Linear(
            window * (embed_units + char_type_embed_units) * 3 + hidden_units,
            hidden_units),
        o_gate=F.Linear(
            window * (embed_units + char_type_embed_units) * 3 + hidden_units,
            hidden_units),
        output=F.Linear(hidden_units + 12, label_num),
    )
    if opt_selection == 'Adagrad':
        opt = optimizers.AdaGrad(lr=learning_rate)
    elif opt_selection == 'SGD':
        opt = optimizers.SGD()
    elif opt_selection == 'Adam':
        opt = optimizers.Adam()
    else:
        opt = optimizers.AdaGrad(lr=learning_rate)
        print('Adagrad is chosen as defaut')
    opt.setup(model)
    return model, opt
Esempio n. 2
0
    def setup_optimizer(self,
                        optimizer_name,
                        gradient_clipping=3,
                        weight_decay=0.00001,
                        **kwargs):
        # set optimizer
        if optimizer_name == "Adam":
            self.opt = optimizers.Adam(**kwargs)
        elif optimizer_name == "AdaDelta":
            self.opt = optimizers.AdaDelta(**kwargs)
        elif optimizer_name == "AdaGrad":
            self.opt = optimizers.AdaGrad(**kwargs)
        elif optimizer_name == "RMSprop":
            self.opt = optimizers.RMSprop(**kwargs)
        elif optimizer_name == "RMSpropGraves":
            self.opt = optimizers.RMSpropGraves(**kwargs)
        elif optimizer_name == "SGD":
            self.opt = optimizers.SGD(**kwargs)
        elif optimizer_name == "MomentumSGD":
            self.opt = optimizers.MomentumSGD(**kwargs)

        # self.opt.use_cleargrads()
        self.opt.setup(self)
        self.opt.add_hook(optimizer.GradientClipping(gradient_clipping))
        self.opt.add_hook(optimizer.WeightDecay(weight_decay))

        self.opt_params = {
            "optimizer_name": optimizer_name,
            "gradient_clipping": gradient_clipping,
            "weight_decay": weight_decay
        }
Esempio n. 3
0
def train(args):
  trace('loading corpus ...')
  with open(args.source) as fp:
    trees = [make_tree(l) for l in fp]

  trace('extracting leaf nodes ...')
  word_lists = [extract_words(t) for t in trees]

  trace('extracting gold operations ...')
  op_lists = [make_operations(t) for t in trees]

  trace('making vocabulary ...')
  word_vocab = Vocabulary.new(word_lists, args.vocab)
  phrase_set = set()
  semi_set = set()
  for tree in trees:
    phrase_set |= set(extract_phrase_labels(tree))
    semi_set |= set(extract_semi_labels(tree))
  phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False)
  semi_vocab = Vocabulary.new([list(semi_set)], len(semi_set), add_special_tokens=False)

  trace('converting data ...')
  word_lists = [convert_word_list(x, word_vocab) for x in word_lists]
  op_lists = [convert_op_list(x, phrase_vocab, semi_vocab) for x in op_lists]

  trace('start training ...')
  parser = Parser(
      args.vocab, args.embed, args.queue, args.stack,
      len(phrase_set), len(semi_set),
  )
  if USE_GPU:
    parser.to_gpu()
  opt = optimizers.AdaGrad(lr = 0.005)
  opt.setup(parser)
  opt.add_hook(optimizer.GradientClipping(5))

  for epoch in range(args.epoch):
    n = 0
    
    for samples in batch(zip(word_lists, op_lists), args.minibatch):
      parser.zerograds()
      loss = my_zeros((), np.float32)

      for word_list, op_list in zip(*samples):
        trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1))
        loss += parser.forward(word_list, op_list, 0)
        n += 1
      
      loss.backward()
      opt.update()

    trace('saving model ...')
    prefix = args.model + '.%03.d' % (epoch + 1)
    word_vocab.save(prefix + '.words')
    phrase_vocab.save(prefix + '.phrases')
    semi_vocab.save(prefix + '.semiterminals')
    parser.save_spec(prefix + '.spec')
    serializers.save_hdf5(prefix + '.weights', parser)

  trace('finished.')
Esempio n. 4
0
def get_model_optimizer(result_folder, cfg_mod):
    model_fn = path.basename(cfg_mod.SRC_MODEL)
    src_model = imp.load_source(
        model_fn.split('.')[0], path.join(result_folder,
                                          cfg_mod.SRC_MODEL)).src_model

    if cfg_mod.OPT_PARAM == 'AdaGrad':
        optimizer = optimizers.AdaGrad(lr=cfg_mod.TRAIN_RATE, eps=cfg_mod.EPS)
    elif cfg_mod.OPT_PARAM == 'MomentumSGD':
        optimizer = optimizers.MomentumSGD(lr=cfg_mod.TRAIN_RATE,
                                           momentum=cfg_mod.MOMENTUM)
    elif cfg_mod.OPT_PARAM == 'AdaDelta':
        optimizer = optimizers.AdaDelta(rho=cfg_mod.TRAIN_RATE,
                                        eps=cfg_mod.EPS)
    elif cfg_mod.OPT_PARAM == 'ADAM':
        optimizer = optimizers.Adam(alpha=cfg_mod.TRAIN_RATE,
                                    beta1=cfg_mod.BETA1,
                                    beta2=cfg_mod.BETA2,
                                    eps=cfg_mod.EPS)
    else:
        raise Exception('No optimizer is selected')
    optimizer.setup(src_model)

    if cfg_mod.WEIGHT_DECAY:
        optimizer.add_hook(chainer.optimizer.WeightDecay(cfg_mod.WEIGHT_DECAY))

    return src_model, optimizer
Esempio n. 5
0
def get_opt(args):
    if args.opt_model == "SGD":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        return optimizers.SGD(lr=alpha0)
    if args.opt_model == "AdaGrad":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        return optimizers.AdaGrad(lr=alpha0)
    if args.opt_model == "AdaDelta":
        alpha0 = 0.95 if args.alpha0 == 0 else args.alpha0
        alpha1 = 1e-06 if args.alpha1 == 0 else args.alpha1
        return optimizers.AdaDelta(rho=alpha0, eps=alpha1)
    if args.opt_model == "Momentum":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        return optimizers.MomentumSGD(lr=alpha0, momentum=alpha1)
    if args.opt_model == "NAG":
        alpha0 = 0.01 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        return optimizers.NesterovAG(lr=alpha0, momentum=alpha1)
    if args.opt_model == "RMS":
        return optimizers.RMSpropGraves()
    if args.opt_model == "SM":
        return optimizers.SMORMS3()
    if args.opt_model == "Adam":  # default case
        alpha0 = 0.001 if args.alpha0 == 0 else args.alpha0
        alpha1 = 0.9 if args.alpha1 == 0 else args.alpha1
        alpha2 = 0.999 if args.alpha2 == 0 else args.alpha2
        alpha3 = 1e-08 if args.alpha3 == 0 else args.alpha3
        return optimizers.Adam(alpha=alpha0,
                               beta1=alpha1,
                               beta2=alpha2,
                               eps=alpha3)
    print('no such optimization method', args.opt_model)
    sys.exit(1)
Esempio n. 6
0
    def train(self, epoch):
        trace('making vocabularies ...')
        self.trg_vocab = Vocabulary.new(gens.word_list(self.target),
                                        self.vocab)

        trace('making model ...')

        trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
        opt = optimizers.AdaGrad(lr=0.01)
        opt.setup(self.encdec)
        opt.add_hook(optimizer.GradientClipping(5))
        gen1 = gens.word_list(self.target)
        gen = gens.batch(gen1, self.minibatch)

        for trg_batch in gen:
            self.batch_size = len(trg_batch)
            self.trg_batch = fill_batch(trg_batch)
            if len(trg_batch) != self.minibatch:
                break
            self.encdec.clear(self.batch_size)
            self.__forward_img()
            self.encdec.reset(self.batch_size)
            loss, hyp_batch = self.__forward_word(self.trg_batch, self.encdec,
                                                  True, 0)
            loss.backward()
            opt.update()
            K = len(self.trg_batch) - 2
            self.print_out(K, hyp_batch, epoch)
Esempio n. 7
0
def main():
    w2v_dict = TransVecotr(MODEL_PATH)
    dataset, height, width = w2v_dict(WAKATI_PATH)

    feat_data = dataset["vec"]
    label_data = xp.array([LAB_DIC[i] for i in dataset["lab"]], dtype=xp.int32)
    x_train, x_test, y_train, y_test = train_test_split(feat_data, label_data, test_size=0.15)

    input_channel = 1
    x_train = xp.array(x_train, dtype=xp.float32).reshape(len(x_train), input_channel, height, width) 
    x_test  = xp.array(x_test, dtype=xp.float32).reshape(len(x_test), input_channel, height, width)

    train = tuple_dataset.TupleDataset(x_train, y_train)
    test = tuple_dataset.TupleDataset(x_test, y_test)

    train_iter = iterators.SerialIterator(train, N_BATCH)
    test_iter = iterators.SerialIterator(test, N_BATCH, repeat=False, shuffle=False)

    model = L.Classifier(SimpleCNN(input_channel, N_OUTPUT, FILTER_H, width, MID_UNITS, N_UNITS, N_LABEL))
    if GPU >= 0:
        model.to_gpu()

    optimizer = optimizers.AdaGrad()
    optimizer.setup(model)
    updater = training.StandardUpdater(train_iter, optimizer, device=GPU)
    trainer = training.Trainer(updater, (N_EPOCH, 'epoch'), out="result")
    trainer.extend(extensions.Evaluator(test_iter, model, device=GPU))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot())
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy']))
    trainer.extend(extensions.ProgressBar())

    trainer.run()
Esempio n. 8
0
def main():
    w2v_dict = TransVecotr(MODEL_PATH)
    dataset, height, width = w2v_dict(WAKATI_PATH)

    feat_data = dataset["vec"]
    label_data = xp.array([LAB_DIC[i] for i in dataset["lab"]], dtype=xp.int32)

    input_channel = 1
    x_train = xp.array(feat_data, dtype=xp.float32).reshape(len(feat_data), input_channel, height, width) 
    train = tuple_dataset.TupleDataset(x_train, label_data)
    train_iter = iterators.SerialIterator(train, N_BATCH)

    model = L.Classifier(SimpleCNN(input_channel, N_OUTPUT, FILTER_H, width, MID_UNITS, N_UNITS, N_LABEL))
    if GPU >= 0:
        model.to_gpu()
    optimizer = optimizers.AdaGrad()
    optimizer.setup(model)
    updater = training.StandardUpdater(train_iter, optimizer, device=GPU)
    trainer = training.Trainer(updater, (1, 'epoch'), out="result")
    serializers.load_npz(TRAINER_PATH, trainer)

    while True:
        input_text = raw_input('input text :')
        if input_text == "exit":
            break
        pred_vec = w2v_dict.gen_pred_vec(input_text, height)
        pred_vec = xp.array([pred_vec], dtype=xp.float32)
        pred_data = xp.array([pred_vec], dtype=xp.float32)
        hyp_data = model.predictor(pred_data)
        res_dict = {v:k for k, v in LAB_DIC.items()}
        if res_dict[hyp_data.data.argmax()] == "yakiu":
            print "彡(゚)(゚) やきう民"
        else:
            print "(´・ω・`) 原住民"
        print
Esempio n. 9
0
def max_ent_deep_irl(feature_matrix, trans_probs, trajs,
                     gamma=0.9, n_epoch=30):
    n_states, d_states = feature_matrix.shape
    _, n_actions, _ = trans_probs.shape
    reward_func = Reward(d_states, 64)
    optimizer = optimizers.AdaGrad(lr=0.01)
    optimizer.setup(reward_func)
    optimizer.add_hook(chainer.optimizer.WeightDecay(1e-4))
    optimizer.add_hook(chainer.optimizer.GradientClipping(100.0))

    feature_exp = np.zeros((d_states))
    for episode in trajs:
        for step in episode:
            feature_exp += feature_matrix[step[0], :]
    feature_exp = feature_exp / len(trajs)

    fmat = chainer.Variable(feature_matrix.astype(np.float32))
    for _ in range(n_epoch):
        reward_func.zerograds()
        r = reward_func(fmat)
        v = value_iteration(trans_probs, r.data.reshape((n_states,)), gamma)
        pi = best_policy(trans_probs, v)
        exp_svf = expected_svf(trans_probs, trajs, pi)
        grad_r = feature_exp - exp_svf
        r.grad = -grad_r.reshape((n_states, 1)).astype(np.float32)
        r.backward()
        optimizer.update()

    return reward_func(fmat).data.reshape((n_states,))
Esempio n. 10
0
def get_model_optimizer(args):
    model = get_model(args)

    if 'opt' in args:
        # prepare optimizer
        if args.opt == 'AdaGrad':
            optimizer = optimizers.AdaGrad(lr=args.lr)
        elif args.opt == 'MomentumSGD':
            optimizer = optimizers.MomentumSGD(lr=args.lr, momentum=0.9)
        elif args.opt == 'Adam':
            optimizer = optimizers.Adam()
        else:
            raise Exception('No optimizer is selected')

        optimizer.setup(model)

        if args.resume_opt is not None:
            serializers.load_hdf5(args.resume_opt, optimizer)
            args.epoch_offset = int(
                re.search('epoch-([0-9]+)', args.resume_opt).groups()[0])

        return model, optimizer

    else:
        print('No optimizer generated.')
        return model
Esempio n. 11
0
def optimizer(opt_str):
    """
    入力文字列からオプティマイザを推定する
    """

    if (opt_str.lower() == 'adam'):
        opt = O.Adam(amsgrad=True)
    elif (opt_str.lower() == 'ada_d'):
        opt = O.AdaDelta()
    elif (opt_str.lower() == 'ada_g'):
        opt = O.AdaGrad()
    elif (opt_str.lower() == 'm_sgd'):
        opt = O.MomentumSGD()
    elif (opt_str.lower() == 'n_ag'):
        opt = O.NesterovAG()
    elif (opt_str.lower() == 'rmsp'):
        opt = O.RMSprop()
    elif (opt_str.lower() == 'rmsp_g'):
        opt = O.RMSpropGraves()
    elif (opt_str.lower() == 'sgd'):
        opt = O.SGD()
    elif (opt_str.lower() == 'smorms'):
        opt = O.SMORMS3()
    else:
        opt = O.Adam(amsgrad=True)
        logger.warning('{}->{}'.format(opt_str, opt.__doc__.split('.')[0]))

    logger.debug('Optimizer: {}'.format(opt.__doc__.split('.')[0]))
    return opt
Esempio n. 12
0
def get_model_optimizer(result_dir, args):
    model_fn = os.path.basename(args.model)
    model_name = model_fn.split('.')[0]
    module = imp.load_source(model_fn.split('.')[0], args.model)
    Net = getattr(module, model_name)

    dst = '%s/%s' % (result_dir, model_fn)
    if not os.path.exists(dst):
        shutil.copy(args.model, dst)

    dst = '%s/%s' % (result_dir, os.path.basename(__file__))
    if not os.path.exists(dst):
        shutil.copy(__file__, dst)

    # prepare model
    model = Net()
    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
    if args.restart_from is not None:
        model = pickle.load(open(args.restart_from, 'rb'))
    if args.gpu >= 0:
        model.to_gpu()

    # prepare optimizer
    if args.opt == 'AdaGrad':
        optimizer = optimizers.AdaGrad(lr=0.0005)
    elif args.opt == 'MomentumSGD':
        optimizer = optimizers.MomentumSGD(lr=0.0005, momentum=0.9)
    elif args.opt == 'Adam':
        optimizer = optimizers.Adam()
    else:
        raise Exception('No optimizer is selected')
    optimizer.setup(model)

    return model, optimizer
Esempio n. 13
0
def get_optimizer(model,
                  opt,
                  lr=None,
                  adam_alpha=None,
                  adam_beta1=None,
                  adam_beta2=None,
                  adam_eps=None,
                  weight_decay=None):
    if opt == 'MomentumSGD':
        optimizer = optimizers.MomentumSGD(lr=lr, momentum=0.9)
    elif opt == 'Adam':
        optimizer = optimizers.Adam(alpha=adam_alpha,
                                    beta1=adam_beta1,
                                    beta2=adam_beta2,
                                    eps=adam_eps)
    elif opt == 'AdaGrad':
        optimizer = optimizers.AdaGrad(lr=lr)
    elif opt == 'RMSprop':
        optimizer = optimizers.RMSprop(lr=lr)
    else:
        raise Exception('No optimizer is selected')

    # The first model as the master model
    optimizer.setup(model)
    if opt == 'MomentumSGD':
        optimizer.add_hook(chainer.optimizer.WeightDecay(weight_decay))

    return optimizer
Esempio n. 14
0
def optimizer(opt_str):
    """
    入力文字列からオプティマイザを推定する
    """

    if(opt_str.lower() == 'adam'):
        opt = O.Adam(amsgrad=True)
    elif(opt_str.lower() == 'ada_d'):
        opt = O.AdaDelta()
    elif(opt_str.lower() == 'ada_g'):
        opt = O.AdaGrad()
    elif(opt_str.lower() == 'm_sgd'):
        opt = O.MomentumSGD()
    elif(opt_str.lower() == 'n_ag'):
        opt = O.NesterovAG()
    elif(opt_str.lower() == 'rmsp'):
        opt = O.RMSprop()
    elif(opt_str.lower() == 'rmsp_g'):
        opt = O.RMSpropGraves()
    elif(opt_str.lower() == 'sgd'):
        opt = O.SGD()
    elif(opt_str.lower() == 'smorms'):
        opt = O.SMORMS3()
    else:
        opt = O.Adam(amsgrad=True)
        print('\n[Warning] {0}\n\t{1}->{2}\n'.format(
            fileFuncLine(), opt_str, opt.__doc__.split('.')[0])
        )

    print('Optimizer:', opt.__doc__.split('.')[0])
    return opt
Esempio n. 15
0
def which_is_best_optimizer(k=10, model=CNN()):
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.Adam(),
                      tag='Adam')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.SGD(),
                      tag='SGD')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.RMSpropGraves(),
                      tag='RMSpropGraves')
    #    k_fold_validation(k, copy.deepcopy(model), optimizer=optimizers.RMSprop(), tag='RMSprop')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.AdaDelta(),
                      tag='AdaDelta')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.AdaGrad(),
                      tag='AdaGrad')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.MomentumSGD(),
                      tag='MomentumSGD')
    k_fold_validation(k,
                      copy.deepcopy(model),
                      optimizer=optimizers.NesterovAG(),
                      tag='NesterovAG')
Esempio n. 16
0
def init_model():
    #Make models
    if use_pre2 == 'pre': pre_unit = 4
    else: pre_unit = 0
    if use_null == 'null': null_unit = 6
    else: null_unit = 0
    if args.phrase == 'phrase':
        phrase_unit = 4
        model = chainer.FunctionSet(
            trainable=chainer.FunctionSet(
                w0=F.Linear(n_units * 2 + null_unit * 2, n_label),
                ww0=F.Linear(
                    n_units * 2 + pre_unit + null_unit * 2 + phrase_unit,
                    n_units + null_unit),
                ww1=F.Linear(
                    n_units * 2 + pre_unit + null_unit * 2 + phrase_unit,
                    n_units + null_unit),
            ),
            w1_f=F.Linear(n_units * 2 + null_unit * 2,
                          n_units + null_unit),  #source input
            w2_f=F.Linear(n_units + null_unit,
                          n_units * 2 + null_unit * 2),  #source output
            w1_e=F.Linear(n_units * 2 + null_unit * 2,
                          n_units + null_unit),  #target input
            w2_e=F.Linear(n_units + null_unit,
                          n_units * 2 + null_unit * 2),  #target output
            embed_f=F.EmbedID(vocab_f['len_vocab'],
                              n_units),  #source word embedding
            embed_e=F.EmbedID(vocab_e['len_vocab'],
                              n_units),  #target word embedding
        )
    else:
        model = chainer.FunctionSet(
            trainable=chainer.FunctionSet(w0=F.Linear(
                n_units * 4 + null_unit * 4, n_label), ),
            w1_f=F.Linear(n_units * 2 + null_unit * 2,
                          n_units + null_unit),  #source input
            w2_f=F.Linear(n_units + null_unit,
                          n_units * 2 + null_unit * 2),  #source output
            w1_e=F.Linear(n_units * 2 + null_unit * 2,
                          n_units + null_unit),  #target input
            w2_e=F.Linear(n_units + null_unit,
                          n_units * 2 + null_unit * 2),  #target output
            embed_f=F.EmbedID(vocab_f['len_vocab'],
                              n_units),  #source word embedding
            embed_e=F.EmbedID(vocab_e['len_vocab'],
                              n_units),  #target word embedding 
        )
    if opt_name == 'SGD':
        optimizer = optimizers.SGD(lr=0.02)  # (lr=opt_score)  # lr=0.01
    elif opt_name == 'AdaGrad':
        optimizer = optimizers.AdaGrad(lr=0.001)  # (lr=opt_score)  # lr=0.001
    elif opt_name == 'AdaDelta':
        optimizer = optimizers.AdaDelta(rho=0.9)  # (rho=opt_score)  # rho=0.9
    elif opt_name == 'Adam':
        optimizer = optimizers.Adam(
            alpha=0.0001)  # (alpha=opt_score)  # alpha=0.0001
    optimizer.setup(model)  # .collect_parameters()
    return model, optimizer
Esempio n. 17
0
 def setup_optimizer(self):
     if self.opt_type == 'sgd':
         self.optimizer = optimizers.SGD(lr=self.opt_lr)
     elif self.opt_type == 'adagrad':
         self.optimizer = optimizers.AdaGrad(lr=self.opt_lr)
     elif self.opt_type == 'adam':
         self.optimizer = optimizers.Adam(alpha=self.opt_lr)
     self.optimizer.setup(self.network.collect_parameters())
Esempio n. 18
0
def main():
    opts = {}

    optimizer = optimizers.AdaGrad()
    opts['optimizer'] = optimizer
    opts['model'] = CNN

    train(opts)
Esempio n. 19
0
def init_model(vocab_size):
    model = chainer.FunctionSet(
        embed=F.EmbedID(vocab_size, embed_units),
        hidden1=F.Linear(window * embed_units, hidden_units),
        output=F.Linear(hidden_units, label_num),
    )
    opt = optimizers.AdaGrad(lr=learning_rate)
    opt.setup(model)
    return model, opt
Esempio n. 20
0
def get_model_optimizer(result_dir, args):
    model = pickle.load(open('models/%s' % NETS[args.net][1]))
    model.to_gpu()

    # prepare optimizer
    optimizer = optimizers.AdaGrad(lr=0.0005)
    optimizer.setup(model)

    return model, optimizer
def train(args):
    trace('making vocabularies ...')
    src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
    trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)

    trace('making model ...')
    attmt = AttentionMT(args.vocab, args.embed, args.hidden)
    if args.use_gpu:
        attmt.to_gpu()

    for epoch in range(args.epoch):
        trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
        trained = 0
        gen1 = gens.word_list(args.source)
        gen2 = gens.word_list(args.target)
        gen3 = gens.batch(
            gens.sorted_parallel(gen1, gen2, 100 * args.minibatch),
            args.minibatch)
        opt = optimizers.AdaGrad(lr=0.01)
        opt.setup(attmt)
        opt.add_hook(optimizer.GradientClipping(5))

        for src_batch, trg_batch in gen3:
            src_batch = fill_batch(src_batch)
            trg_batch = fill_batch(trg_batch)
            K = len(src_batch)
            hyp_batch, loss = forward(src_batch, trg_batch, src_vocab,
                                      trg_vocab, attmt, True, 0)
            loss.backward()
            opt.update()

            for k in range(K):
                trace('epoch %3d/%3d, sample %8d' %
                      (epoch + 1, args.epoch, trained + k + 1))
                trace(
                    '  src = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in src_batch[k]]))
                trace(
                    '  trg = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in trg_batch[k]]))
                trace(
                    '  hyp = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in hyp_batch[k]]))

            trained += K

        trace('saving model ...')
        prefix = args.model + '.%03.d' % (epoch + 1)
        src_vocab.save(prefix + '.srcvocab')
        trg_vocab.save(prefix + '.trgvocab')
        attmt.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', attmt)

    trace('finished.')
    def train(self):
        """
        Train method
        If you use the word2vec model, you possible to use the copy weight
        Optimizer method use the Adagrad
        """
        trace('making vocabularies ...')
        src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace('making model ...')
        self.attention_dialogue = AttentionDialogue(self.vocab, self.embed,
                                                    self.hidden, self.XP)
        if self.word2vecFlag:
            self.copy_model(self.word2vec, self.attention_dialogue.emb)
            self.copy_model(self.word2vec,
                            self.attention_dialogue.dec,
                            dec_flag=True)

        for epoch in range(self.epoch):
            trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
            trained = 0
            gen1 = gens.word_list(self.source)
            gen2 = gens.word_list(self.target)
            gen3 = gens.batch(
                gens.sorted_parallel(gen1, gen2, 100 * self.minibatch),
                self.minibatch)
            opt = optimizers.AdaGrad(lr=0.01)
            opt.setup(self.attention_dialogue)
            opt.add_hook(optimizer.GradientClipping(5))

            random_number = random.randint(0, self.minibatch - 1)
            for src_batch, trg_batch in gen3:
                src_batch = fill_batch(src_batch)
                trg_batch = fill_batch(trg_batch)
                K = len(src_batch)
                hyp_batch, loss = self.forward_implement(
                    src_batch, trg_batch, src_vocab, trg_vocab,
                    self.attention_dialogue, True, 0)
                loss.backward()
                opt.update()

                self.print_out(random_number, epoch, trained, src_batch,
                               trg_batch, hyp_batch)

                trained += K

        trace('saving model ...')
        prefix = self.model
        model_path = APP_ROOT + "/model/" + prefix
        src_vocab.save(model_path + '.srcvocab')
        trg_vocab.save(model_path + '.trgvocab')
        self.attention_dialogue.save_spec(model_path + '.spec')
        serializers.save_hdf5(model_path + '.weights', self.attention_dialogue)

        trace('finished.')
Esempio n. 23
0
 def initializeOptimizer(self, optimizerAlgorithm):
     if optimizerAlgorithm == "Adam":
         self.optimizer = optimizers.Adam()
     elif optimizerAlgorithm == "AdaGrad":
         self.optimizer = optimizers.AdaGrad()
     elif optimizerAlgorithm == "SGD":
         self.optimizer = optimizers.MomentumSGD()
     else:
         raise ValueError('could not find %s in optimizers {"Adam", "AdaGrad", "SGD"}' % (optimizerAlgorithm))
     self.optimizer.setup(self.model)
Esempio n. 24
0
 def __init__(self, outputdim, minval, optimizer=None):
     if optimizer is None:
         self.optimizer = chainer.optimizers.Adam()
     else:
         self.optimizer = optimizer
     self.model = GoogLeNetBN(outputdim)
     self.optimizer.setup(self.model)
     self.myOptimizers = [optimizers.Adam(), optimizers.AdaGrad(), optimizers.AdaDelta()]
     self.mindata = -minval[0]
     print(self.mindata)
Esempio n. 25
0
 def initialize_optimizer(self, lr=0.5):
     if self.algorithm == 'SGD':
         self.optimizer = optimizers.SGD(lr=lr)
     elif self.algorithm == 'Adam':
         self.optimizer = optimizers.Adam()
     elif self.algorithm == 'Adagrad':
         self.optimizer = optimizers.AdaGrad()
     elif self.algorithm == 'Adadelta':
         self.optimizer = optimizers.AdaDelta()
     else:
         raise AssertionError('this algorithm is not available')
     self.optimizer.setup(self.model)
Esempio n. 26
0
 def init_optimizer(self):
     if self.optimizer == 'SGD':
         self.optimizer = optimizers.MomentumSGD(lr=self.learning_rate,
                                                 momentum=self.momentum)
     elif self.optimizer == 'AdaDelta':
         self.optimizer = optimizers.AdaDelta()
     elif self.optimizer == 'AdaGrad':
         self.optimizer = optimizers.AdaGrad()
     elif self.optimizer == 'Adam':
         self.optimizer = optimizers.Adam()
     elif self.optimizer == 'RMSprop':
         self.optimizer = optimizers.RMSprop()
Esempio n. 27
0
def get_optimizer(opt):
    # prepare optimizer 
    if opt == 'MomentumSGD':
        optimizer = optimizers.MomentumSGD(lr=args.lr, momentum=0.7)
    elif opt == 'Adam':
        optimizer = optimizers.Adam(alpha=args.alpha)
    elif opt == 'AdaGrad':
        optimizer = optimizers.AdaGrad(lr=args.lr)
    else:
        raise Exception('No optimizer is selected')

    return optimizer
def init(args):
    def parse(line):
        attr, pos_id = line.split()
        attr = tuple(attr.split(','))
        return (attr, int(pos_id))

    model = md.Analyzer(
        md.BidirectionalRecognizer(md.Recognizer(256, 100, 100, 100),
                                   md.Recognizer(256, 100, 100, 100)),
        md.Tagger(md.BiClassifier(100), chainer.ChainList()))
    optimizer = optimizers.AdaGrad(lr=0.01)
    optimizer.setup(model)
    return Storage(model, optimizer)
Esempio n. 29
0
def cross_optimizers(opt):
    if opt == 'SGD':
        optimizer = optimizers.SGD()
    elif opt == 'MomentumSGD':
        optimizer = optimizers.MomentumSGD()
    elif opt == 'AdaGrad':
        optimizer = optimizers.AdaGrad()
    elif opt == 'RMSprop':
        optimizer = optimizers.RMSprop()
    elif opt == 'AdaDelta':
        optimizer = optimizers.AdaDelta()
    elif opt == 'Adam':
        optimizer = optimizers.Adam()
    return copy.deepcopy(optimizer)
Esempio n. 30
0
    def train(self):
        trace('making vocabularies ...')
        src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace('making model ...')
        encdec = EncoderDecoder(self.vocab, self.embed, self.hidden)
        if self.word2vecFlag:
            self.copy_model(self.word2vec, encdec.enc)
            self.copy_model(self.word2vec, encdec.dec, dec_flag=True)

        for epoch in range(self.epoch):
            trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
            trained = 0
            gen1 = gens.word_list(self.source)
            gen2 = gens.word_list(self.target)
            gen3 = gens.batch(
                gens.sorted_parallel(gen1, gen2, 100 * self.minibatch),
                self.minibatch)
            opt = optimizers.AdaGrad(lr=0.01)
            opt.setup(encdec)
            opt.add_hook(optimizer.GradientClipping(5))

            random_number = random.randint(0, self.minibatch - 1)
            for src_batch, trg_batch in gen3:
                src_batch = fill_batch(src_batch)
                trg_batch = fill_batch(trg_batch)
                K = len(src_batch)
                # If you use the ipython note book you hace to use the forward function
                # hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0)
                hyp_batch, loss = self.forward_implement(
                    src_batch, trg_batch, src_vocab, trg_vocab, encdec, True,
                    0)
                loss.backward()
                opt.update()

                self.print_out(random_number, epoch, trained, src_batch,
                               trg_batch, hyp_batch)

                trained += K

        trace('saving model ...')
        prefix = self.model
        src_vocab.save(prefix + '.srcvocab')
        trg_vocab.save(prefix + '.trgvocab')
        encdec.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', encdec)

        trace('finished.')