Ejemplo n.º 1
0
	def __init__(self):
		Model.__init__(self)

		self.fc = self.build_network(output_dim=len(config.actions))

		self.optimizer_fc = optimizers.Adam(alpha=config.rl_learning_rate, beta1=config.rl_gradient_momentum)
		self.optimizer_fc.setup(self.fc)
		self.optimizer_fc.add_hook(optimizer.GradientClipping(10.0))

		self.load()
		self.update_target()
Ejemplo n.º 2
0
def create_value_based_learner(cfg_name):
    """
    Creates a learner that can be used with value based algorithms from chainerrl.
    :param cfg_name: type str, the name of the config
    :return: chainerrl agent specified in config
    """
    vb_config = Config(cfg_name)
    network = getattr(models, vb_config.get_str('BASIC', 'network'))(
        **vb_config.get_section('NETWORK'))
    q_func = q_functions.SingleModelStateQFunctionWithDiscreteAction(model=network)
    opt = getattr(optimizers, vb_config.get_str('BASIC', 'optimizer'))(
        **vb_config.get_section('OPTIMIZER'))

    opt.setup(q_func)
    opt.add_hook(
        optimizer.GradientClipping(threshold=vb_config.get_float('BASIC', 'grad_clip')))
    rep_buf = replay_buffer.PrioritizedEpisodicReplayBuffer(
        capacity=vb_config.get_int('MEMORY_BUFFER', 'episodic_buffer_size'),
        wait_priority_after_sampling=vb_config.get_bool('MEMORY_BUFFER',
                                                        'wait_priority_after_sampling'))

    explorer = explorers.LinearDecayEpsilonGreedy(
        random_action_func=lambda: np.random.random_integers(0, vb_config.get_int('NETWORK',
                                                                                  'output_dim') - 1),
        **vb_config.get_section('EXPLORER'))

    try:
        learner = getattr(agents, vb_config.get_str('BASIC', 'learner'))(q_function=q_func,
                                                                         optimizer=opt,
                                                                         replay_buffer=rep_buf,
                                                                         phi=lambda x: x,
                                                                         explorer=explorer,
                                                                         **vb_config.get_section(
                                                                             'ALGORITHM'))
        if vb_config.get_str('BASIC', 'load_path'):
            learner.load(os.path.join(get_results_path(), vb_config.get_str('BASIC', 'load_path')))

    except AttributeError as e:
        logger.log(msg='Cannot find model {} in chainerrl.agents'.format(
            vb_config.get_str('BASIC', 'learner')),
            level=logging.ERROR)
        raise e

    logger.log(msg='Created learner {}'.format(learner.__class__.__name__),
               level=logging.INFO)
    logger.log(msg='Model parameters {}'.format(
        ' '.join([name + ':' + str(value) for name, value in
                  vb_config.get_section('EXPERIMENT').items()])), level=logging.INFO)
    logger.log(msg='Explorer parameters {}'.format(
        ' '.join([name + ':' + str(value) for name, value in
                  vb_config.get_section('EXPLORER').items()])), level=logging.INFO)

    return learner
Ejemplo n.º 3
0
    def train(self):
        trace('making vocabularies ...')
        src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace('making model ...')
        encdec = EncoderDecoder(self.vocab, self.embed, self.hidden)
        if self.word2vecFlag:
            self.copy_model(self.word2vec, encdec.enc)
            self.copy_model(self.word2vec, encdec.dec, dec_flag=True)

        for epoch in range(self.epoch):
            trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
            trained = 0
            gen1 = gens.word_list(self.source)
            gen2 = gens.word_list(self.target)
            gen3 = gens.batch(
                gens.sorted_parallel(gen1, gen2, 100 * self.minibatch),
                self.minibatch)
            opt = optimizers.AdaGrad(lr=0.01)
            opt.setup(encdec)
            opt.add_hook(optimizer.GradientClipping(5))

            random_number = random.randint(0, self.minibatch - 1)
            for src_batch, trg_batch in gen3:
                src_batch = fill_batch(src_batch)
                trg_batch = fill_batch(trg_batch)
                K = len(src_batch)
                # If you use the ipython note book you hace to use the forward function
                # hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0)
                hyp_batch, loss = self.forward_implement(
                    src_batch, trg_batch, src_vocab, trg_vocab, encdec, True,
                    0)
                loss.backward()
                opt.update()

                self.print_out(random_number, epoch, trained, src_batch,
                               trg_batch, hyp_batch)

                trained += K

        trace('saving model ...')
        prefix = self.model
        src_vocab.save(prefix + '.srcvocab')
        trg_vocab.save(prefix + '.trgvocab')
        encdec.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', encdec)

        trace('finished.')
Ejemplo n.º 4
0
def create_async_learner(cfg_name):
    """
    Creates a learner that can be used with asynchronous algorithms from chainerrl.
    :param cfg_name: type str, the name of the config
    :return: chainerrl agent specified in config
    """
    config = Config(cfg_name)
    network = getattr(models, config.get_str('BASIC', 'network'))(**config.get_section('NETWORK'))
    opt = rmsprop_async.RMSpropAsync(**config.get_section('OPTIMIZER'))
    opt.setup(network)
    opt.add_hook(optimizer.GradientClipping(threshold=config.get_float('BASIC', 'grad_clip')))
    learner = getattr(agents, config.get_str('BASIC', 'learner'))(network, opt,
                                                                  **config.get_section(
                                                                      'ALGORITHM'))
    return learner
Ejemplo n.º 5
0
def train(args):
  trace('making vocabularies ...')
  src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
  trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)

  trace('making model ...')
  attmt = AttentionMT(args.vocab, args.embed, args.hidden)
  if args.use_gpu:
    attmt.to_gpu()

  for epoch in range(args.epoch):
    trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
    trained = 0
    gen1 = gens.word_list(args.source)
    gen2 = gens.word_list(args.target)
    gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch)
    opt = optimizers.AdaGrad(lr = 0.01)
    opt.setup(attmt)
    opt.add_hook(optimizer.GradientClipping(5))

    for src_batch, trg_batch in gen3:
      src_batch = fill_batch(src_batch)
      trg_batch = fill_batch(trg_batch)
      K = len(src_batch)
      hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, attmt, True, 0)
      loss.backward()
      opt.update()

      for k in range(K):
        trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
        trace('  src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
        trace('  trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
        trace('  hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))

      trained += K

    trace('saving model ...')
    prefix = args.model + '.%03.d' % (epoch + 1)
    src_vocab.save(prefix + '.srcvocab')
    trg_vocab.save(prefix + '.trgvocab')
    attmt.save_spec(prefix + '.spec')
    serializers.save_hdf5(prefix + '.weights', attmt)

  trace('finished.')
Ejemplo n.º 6
0
    def train(self):
        trace('making vocabularies ...')
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace('making model ...')

        for epoch in range(self.epoch):
            trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
            trained = 0
            opt = optimizers.AdaGrad(lr=0.01)
            opt.setup(self.encdec)
            opt.add_hook(optimizer.GradientClipping(5))
            gen1 = gens.word_list(self.target)
            gen = gens.batch(gen1, self.minibatch)

            random_number = random.randint(0, self.minibatch - 1)
            for trg_batch in gen:
                self.trg_batch = fill_batch(trg_batch)
                if len(self.trg_batch) != self.minibatch:
                    break
                hyp_batch, loss = self.forward(trg_vocab, self.use_gpu,
                                               self.gpu_id)
                loss.backward()
                opt.update()
                K = len(self.trg_batch)

                if trained == 0:
                    self.print_out(random_number, epoch, trained, hyp_batch)

                trained += K

        trace('saving model ...')
        prefix = self.model
        trg_vocab.save(prefix + '.trgvocab')
        self.encdec.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', self.encdec)

        trace('finished.')
Ejemplo n.º 7
0
def train():
    dictf = open(dictpath, 'rb')
    w_id_dict = pickle.load(dictf)

    vocab_size = len(w_id_dict)

    model = Seq2Seq(vocab_size=vocab_size,
                    embed_size=EMBED_SIZE,
                    hidden_size=HIDDEN_SIZE,
                    batch_size=BATCH_SIZE)

    model.reset()
    inf = open(inputpath, 'rb')
    data = pickle.load(inf)
    data = vocab_to_id(data, w_id_dict)

    for epoch in range(EPOCH_NUM):
        opt = optimizers.Adam()
        opt.setup(model)
        opt.add_hook(optimizer.GradientClipping(5))

        for num in range(len(data) // BATCH_SIZE):

            minibatch = data[num * BATCH_SIZE:(num + 1) * BATCH_SIZE]
            enc_words, dec_words = make_minibatch(minibatch)

            total_loss = model.feedforward(enc_words=enc_words,
                                           dec_words=dec_words)

            total_loss.backward()
            opt.update()

        print('Epoch %s 終了' % (epoch + 1))
        outputfile = outputpath % (EMBED_SIZE, HIDDEN_SIZE, BATCH_SIZE,
                                   epoch + 1)
        serializers.save_npz(outputfile, model)
Ejemplo n.º 8
0
def train(args):
    trace('loading corpus ...')
    with open(args.source) as fp:
        trees = [make_tree(l) for l in fp]

    trace('extracting leaf nodes ...')
    word_lists = [extract_words(t) for t in trees]
    lower_lists = [[w.lower() for w in words] for words in word_lists]

    trace('extracting gold operations ...')
    op_lists = [make_operations(t) for t in trees]

    trace('making vocabulary ...')
    word_vocab = Vocabulary.new(lower_lists, args.vocab)
    phrase_set = set()
    semiterminal_set = set()
    for tree in trees:
        phrase_set |= set(extract_phrase_labels(tree))
        semiterminal_set |= set(extract_semiterminals(tree))
    phrase_vocab = Vocabulary.new([list(phrase_set)],
                                  len(phrase_set),
                                  add_special_tokens=False)
    semiterminal_vocab = Vocabulary.new([list(semiterminal_set)],
                                        len(semiterminal_set),
                                        add_special_tokens=False)

    trace('converting data ...')
    word_lists = [convert_word_list(x, word_vocab) for x in word_lists]
    op_lists = [
        convert_op_list(x, phrase_vocab, semiterminal_vocab) for x in op_lists
    ]

    trace('start training ...')
    parser = Parser(
        args.vocab,
        args.embed,
        args.char_embed,
        args.queue,
        args.stack,
        args.srstate,
        len(phrase_set),
        len(semiterminal_set),
    )
    if args.use_gpu:
        parser.to_gpu()
    opt = optimizers.SGD(lr=0.1)
    opt.setup(parser)
    opt.add_hook(optimizer.GradientClipping(10))
    opt.add_hook(optimizer.WeightDecay(0.0001))

    batch_set = list(zip(word_lists, op_lists))

    for epoch in range(args.epoch):
        n = 0
        random.shuffle(batch_set)

        for samples in batch(batch_set, args.minibatch):
            parser.zerograds()
            loss = XP.fzeros(())

            for word_list, op_list in zip(*samples):
                trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1))
                loss += parser.forward_train(word_list, op_list)
                n += 1

            loss.backward()
            opt.update()

        trace('saving model ...')
        prefix = args.model + '.%03.d' % (epoch + 1)
        word_vocab.save(prefix + '.words')
        phrase_vocab.save(prefix + '.phrases')
        semiterminal_vocab.save(prefix + '.semiterminals')
        parser.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', parser)

        opt.lr *= 0.92

    trace('finished.')
Ejemplo n.º 9
0
    def fit(self,
            queries,
            responses,
            train_path,
            epoch_num=30,
            batch_size=40,
            tag=None):
        train_queries = self.xp.vstack(self.xp.array(queries))
        train_responses = self.xp.vstack(self.xp.array(responses))
        teacher_num = min(len(train_queries), len(train_responses))

        opt = optimizers.Adam()
        opt.setup(self.model)
        opt.add_hook(optimizer.GradientClipping(5))
        if self.flag_gpu:
            self.model.to_gpu(0)
        self.model.reset()

        # 学習開始
        st = datetime.datetime.now()
        for epoch in range(self.npz_num, epoch_num):
            # ミニバッチ学習
            perm = np.random.permutation(teacher_num)  # ランダムでuniqueな整数列リストを取得
            total_loss = 0
            total_accuracy = 0
            for i in range(0, teacher_num, batch_size):
                # モデルの勾配などをリセット
                self.model.reset()
                # 整数列リストからそれぞれのwordを取得
                enc_words = train_queries[perm[i:i + batch_size]].T
                dec_words = train_responses[perm[i:i + batch_size]].T
                # エンコード時のバッチサイズ
                encode_batch_size = len(enc_words[0])
                # エンコードの計算
                self.model.encode(enc_words, encode_batch_size)
                # <eos>をデコーダーに読み込ませる
                t = self.xp.array([0] * encode_batch_size, dtype='int32')
                # 損失の初期化
                loss = self.xp.zeros((), dtype='float32')
                # 精度の初期化
                accuracy = self.xp.zeros((), dtype='float32')
                # 1単語ずつデコードする
                for w in dec_words:
                    y = self.model.decode(t)
                    t = self.xp.array(w, dtype='int32')  # 正解単語をarrayに変換
                    loss += F.softmax_cross_entropy(
                        y, t)  # 正解単語と予測単語を照らし合わせて損失を計算
                    accuracy += F.accuracy(y, t)  # 精度の計算
                loss.backward()
                loss.unchain_backward()
                opt.update()
                total_loss += loss.data
                total_accuracy += accuracy.data
            if (epoch + 1) % 10 == 0:
                # モデルの保存
                if self.flag_gpu:  # modelをCPUでも使えるように
                    self.model.to_cpu()
                serializers.save_npz(train_path + str(epoch + 1) + ".npz",
                                     self.model)
                if self.flag_gpu:
                    self.model.to_gpu(0)
            ed = datetime.datetime.now()
            epoch_data = "epoch: {}\ttag: {}\n".format(epoch + 1, str(tag))
            loss_data = "\tloss: {}\n".format(round(float(total_loss), 2))
            accuracy_data = "\taccuracy: {}\n".format(
                round(float(total_accuracy), 2))
            time_data = "\ttime: {}".format(ed - st)
            text = epoch_data + loss_data + accuracy_data + time_data
            print(text)
            st = datetime.datetime.now()
Ejemplo n.º 10
0
def main():
    args = parse_args()

    trace('making vocabulary ...')
    vocab, num_lines, num_words = make_vocab(args.corpus, args.vocab)

    trace('initializing CUDA ...')
    cuda.init()

    trace('start training ...')
    if args.model is 0:
        model = BasicRnnLM(args.embed, args.hidden, args.vocab)
        model.reset()
    elif args.model is 1:
        model = LSTMRnn(args.embed, args.hidden, args.vocab)
        model.reset()
    elif args.model is 2:
        model = AttentionLM(args.embed, args.hidden, args.vocab)
        model.reset()
    model.to_gpu()

    for epoch in range(args.epoch):
        trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
        log_ppl = 0.0
        trained = 0

        opt = optimizers.AdaGrad(lr=0.01)
        opt.setup(model)
        opt.add_hook(optimizer.GradientClipping(5))

        for batch in generate_batch(args.corpus, args.minibatch):
            K = len(batch)
            loss, perplexity = forward(batch, model)
            loss.backward()
            log_ppl += perplexity
            opt.update()
            trained += K
            model.reset()

        trace('  %d/%d' % (trained, num_lines))
        log_ppl /= float(num_words)
        trace('Train  log(PPL) = %.10f' % log_ppl)
        trace('Train  PPL      = %.10f' % math.exp(log_ppl))

        log_ppl = 0.0

        for batch in generate_batch(args.valid, args.minibatch):
            K = len(batch)
            loss, perplexity = forward(batch, model)
            log_ppl += perplexity
            model.reset()

        trace('Valid  log(PPL) = %.10f' % log_ppl)
        trace('Valid  PPL      = %.10f' % math.exp(log_ppl))

        trace('  writing model ...')
        trace('saving model ...')
        prefix = 'RNNLM-' + str(args.model) + '.%03.d' % (epoch + 1)
        save_vocab(prefix + '.srcvocab', vocab)  #Fix this # Fixed
        model.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', model)

    trace('training finished.')
Ejemplo n.º 11
0
def train(args):
    vocab = Vocabulary.from_conll(args.train, args.vocab)
    train_dataset = [conll_to_train(x, vocab) for x in read_conll(args.train)]
    dev_dataset = [conll_to_train(x, vocab) for x in read_conll(args.dev)]

    parser = Parser(args.vocab, args.embed, args.hidden)
    if args.gpu >= 0:
        parser.to_gpu()

    opt = optimizers.AdaGrad(lr=0.01)
    opt.setup(parser)
    opt.add_hook(optimizer.GradientClipping(10))
    opt.add_hook(optimizer.WeightDecay(0.0001))

    for epoch in range(args.epoch):
        random.shuffle(train_dataset)

        parser.zerograds()
        loss = XP.fzeros(())

        for i, data in enumerate(train_dataset):
            trace('epoch %3d: train sample %6d:' % (epoch + 1, i + 1))
            parent_scores, root_scores = parser.forward(data)
            if len(data) > 1:
                parent_scores = functions.split_axis(parent_scores, len(data),
                                                     0)
            else:
                parent_scores = (parent_scores, )

            root = -1
            for j, (p_scores, (wid,
                               parent)) in enumerate(zip(parent_scores, data)):
                if parent == -1:
                    trace('  %3d: root' % j)
                    root = j
                else:
                    parent_est = p_scores.data.argmax()
                    trace('%c %3d -> %3d (%3d)' %
                          ('*' if parent == parent_est else ' ', j, parent_est,
                           parent))
                    loss += functions.softmax_cross_entropy(
                        p_scores, XP.iarray([parent]))

            root_est = root_scores.data.argmax()
            trace('ROOT: %3d (%3d)' % (root_est, root))
            loss += functions.softmax_cross_entropy(root_scores,
                                                    XP.iarray([root]))

            if (i + 1) % 200 == 0:
                loss.backward()
                opt.update()
                parser.zerograds()
                loss = XP.fzeros(())

        loss.backward()
        opt.update()
        trace('epoch %3d: trained.                        ' % (epoch + 1))

        parent_num = 0
        parent_match = 0
        root_num = 0
        root_match = 0
        for i, data in enumerate(dev_dataset):
            trace('epoch %3d: dev sample %6d:' % (epoch + 1, i + 1),
                  rollback=True)
            parent_scores, root_scores = parser.forward(data)
            if len(data) > 1:
                parent_scores = functions.split_axis(parent_scores, len(data),
                                                     0)
            else:
                parent_scores = (parent_scores, )

            root = -1
            for j, (p_scores, (wid,
                               parent)) in enumerate(zip(parent_scores, data)):
                if parent == -1:
                    root = j
                else:
                    parent_est = p_scores.data.argmax()
                    parent_num += 1
                    parent_match += 1 if parent_est == parent else 0

            root_est = root_scores.data.argmax()
            root_num += 1
            root_match += 1 if root_est == root else 0

        result_str = \
          'epoch %3d: dev: parent-acc = %.4f (%5d/%5d), root-acc = %.4f (%4d/%4d)' % \
          ( \
            epoch + 1, \
            parent_match / parent_num, parent_match, parent_num, \
            root_match / root_num, root_match, root_num)
        trace(result_str)

        with open(args.model + '.log', 'a') as fp:
            print(result_str, file=fp)

        trace('epoch %3d: saving models ...' % (epoch + 1))
        prefix = args.model + '.%03d' % (epoch + 1)
        vocab.save(prefix + '.vocab')
        parser.save_spec(prefix + '.parent_spec')
        serializers.save_hdf5(prefix + '.parent_weights', parser)

    trace('finished.')
Ejemplo n.º 12
0
def train(args):
    trace('loading corpus ...')
    with open(args.source) as fp:
        trees = [make_tree(l) for l in fp]

    trace('extracting leaf nodes ...')
    word_lists = [extract_words(t) for t in trees]

    trace('extracting gold operations ...')
    op_lists = [make_operations(t) for t in trees]

    trace('making vocabulary ...')
    word_vocab = Vocabulary.new(word_lists, args.vocab)
    phrase_set = set()
    semi_set = set()
    for tree in trees:
        phrase_set |= set(extract_phrase_labels(tree))
        semi_set |= set(extract_semi_labels(tree))
    phrase_vocab = Vocabulary.new([list(phrase_set)],
                                  len(phrase_set),
                                  add_special_tokens=False)
    semi_vocab = Vocabulary.new([list(semi_set)],
                                len(semi_set),
                                add_special_tokens=False)

    trace('converting data ...')
    word_lists = [convert_word_list(x, word_vocab) for x in word_lists]
    op_lists = [convert_op_list(x, phrase_vocab, semi_vocab) for x in op_lists]

    trace('start training ...')
    parser = Parser(
        args.vocab,
        args.embed,
        args.queue,
        args.stack,
        len(phrase_set),
        len(semi_set),
    )
    if USE_GPU:
        parser.to_gpu()
    opt = optimizers.AdaGrad(lr=0.005)
    opt.setup(parser)
    opt.add_hook(optimizer.GradientClipping(5))

    for epoch in range(args.epoch):
        n = 0

        for samples in batch(zip(word_lists, op_lists), args.minibatch):
            parser.zerograds()
            loss = my_zeros((), np.float32)

            for word_list, op_list in zip(*samples):
                trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1))
                loss += parser.forward(word_list, op_list, 0)
                n += 1

            loss.backward()
            opt.update()

        trace('saving model ...')
        prefix = args.model + '.%03.d' % (epoch + 1)
        word_vocab.save(prefix + '.words')
        phrase_vocab.save(prefix + '.phrases')
        semi_vocab.save(prefix + '.semiterminals')
        parser.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', parser)

    trace('finished.')
def main():
  args = parse_args()
  XP.set_library(args)
  date=time.localtime()[:6]
  D=[]
  for i in date:
    D.append(str(i))
  D="_".join(D)

  save_path=args.save_path
  if os.path.exists(save_path)==False:
    os.mkdir(save_path)

  if args.model_path!=None:
    print("continue existed model!! load recipe of {}".format(args.model_path))
    with open(args.model_path+'/recipe.json','r') as f:
      recipe=json.load(f)
    vae_enc=recipe["network"]["IM"]["vae_enc"]
    vae_z=recipe["network"]["IM"]["vae_z"]
    vae_dec=recipe["network"]["IM"]["vae_dec"]
    times=recipe["network"]["IM"]["times"]
    alpha=recipe["network"]["IM"]["KLcoefficient"]
    
    batchsize=recipe["setting"]["batchsize"]
    maxepoch=args.maxepoch
    weightdecay=recipe["setting"]["weightdecay"]
    grad_clip=recipe["setting"]["grad_clip"]
    cur_epoch=recipe["setting"]["cur_epoch"]+1
    ini_lr=recipe["setting"]["initial_learningrate"]
    cur_lr=recipe["setting"]["cur_lr"]            

    with open(args.model_path+"/../trainloss.json",'r') as f:
      trainloss_dic=json.load(f)
    with open(args.model_path+"/../valloss.json",'r') as f:
      valloss_dic=json.load(f)

  else:
    vae_enc=args.vae_enc
    vae_z=args.vae_z
    vae_dec=args.vae_dec
    times=args.times
    alpha=args.alpha
    batchsize=args.batchsize
    maxepoch=args.maxepoch
    weightdecay=args.weightdecay
    grad_clip=5
    cur_epoch=0
    ini_lr=args.lr
    cur_lr=ini_lr
    trainloss_dic={}
    valloss_dic={}

  print('this experiment started at :{}'.format(D))
  print('***Experiment settings***')
  print('[IM]vae encoder hidden size :{}'.format(vae_enc))
  print('[IM]vae hidden layer size :{}'.format(vae_z))
  print('[IM]vae decoder hidden layer size :{}'.format(vae_dec)) 
  print('[IM]sequence length:{}'.format(times)) 
  print('max epoch :{}'.format(maxepoch))
  print('mini batch size :{}'.format(batchsize))
  print('initial learning rate :{}'.format(cur_lr))
  print('weight decay :{}'.format(weightdecay))
  print("optimization by :{}".format("Adam"))
  print("VAE KL coefficient:",alpha)
  print('*************************') 
  
  vae = VAE_bernoulli_noattention(vae_enc,vae_z,vae_dec,28,28,1)
  opt = optimizers.Adam(alpha = cur_lr)
  opt.setup(vae)
  if args.model_path!=None:
    print('loading model ...')
    serializers.load_npz(args.model_path + '/VAEweights', vae)
    serializers.load_npz(args.model_path + '/optimizer', opt)
  else:
    print('making [[new]] model ...')
    for param in vae.params():
      data = param.data
      data[:] = np.random.uniform(-0.1, 0.1, data.shape)
  opt.add_hook(optimizer.GradientClipping(grad_clip))
  opt.add_hook(optimizer.WeightDecay(weightdecay))  

  if args.gpu >= 0 :
    vae.to_gpu()

  mnist=MNIST(binarize=True)
  train_size = mnist.train_size
  test_size = mnist.test_size
 
  eps = 1e-8
  for epoch in range(cur_epoch+1, maxepoch+1):
    print('\nepoch {}'.format(epoch))
    LX = 0.0
    LZ = 0.0
    counter = 0
    for iter,(img_array,label_array) in enumerate(mnist.gen_train(batchsize,Random=True)):
        B = img_array.shape[0]
        Lz = XP.fzeros(())
        vae.reset(img_array)
        
        #first to T-1 step
        for j in range(times-1):
            y,kl = vae.free_energy_onestep()
            Lz_i = alpha*kl
            Lz += Lz_i
        #last step
        j+=1
        y,kl = vae.free_energy_onestep()
        Lz_i = alpha*kl
        Lz += Lz_i
        Lx = Bernoulli_nll_wesp(vae.x,y,eps)
        
        LZ += Lz.data
        LX += Lx.data
 
        loss = (Lx+Lz)/batchsize
        loss.backward()
        opt.update()

        counter += B
        sys.stdout.write('\rnow training ...  epoch {}, {}/{}  '.format(epoch,counter,mnist.train_size))
        sys.stdout.flush()
        if (iter+1) % 100 == 0:
          print("({}-th batch mean loss) Lx:%03.3f Lz:%03.3f".format(counter) % (Lx.data/B,Lz.data/B))

    img_array = cuda.to_cpu(y.data)
    im_array = img_array.reshape(batchsize*28,28)
    img = im_array[:28*5]
    plt.clf()
    plt.imshow(img,cmap=cm.gray)
    plt.colorbar(orientation='horizontal')
    plt.savefig(save_path+"/"+"img{}.png".format(epoch))

    trace(save_path+"/trainloss.txt","epoch {} Lx:{} Lz:{} Lx+Lz:{}".format(epoch,LX/train_size,LZ/train_size,(LX+LZ)/train_size))            	
    trainloss_dic[str(epoch).zfill(3)]={
                    "Lx":float(LX/train_size),
                    "Lz":float(LZ/train_size),
                    "Lx+Lz":float((LX+LZ)/train_size)}
    with open(save_path+"/trainloss.json",'w') as f:
        json.dump(trainloss_dic,f,indent=4)   

    print('save model ...')
    prefix = save_path+"/"+str(epoch).zfill(3)
    if os.path.exists(prefix)==False:
        os.mkdir(prefix)        
    serializers.save_npz(prefix + '/VAEweights', vae) 
    serializers.save_npz(prefix + '/optimizer', opt)
    print('save recipe...')
    recipe_dic = {
    "date":D,
    "setting":{
        "maxepoch":maxepoch,
        "batchsize":batchsize,
        "weightdecay":weightdecay,
        "grad_clip":grad_clip,
        "opt":"Adam",
        "initial_learningrate":ini_lr,
        "cur_epoch":epoch,
        "cur_lr":cur_lr},
    "network":{
        "IM":{
            "x_size":784,
            "vae_enc":vae_enc,
            "vae_z":vae_z,
            "vae_dec":vae_dec,
            "times":times,
            "KLcoefficient":alpha},
            },
            }
    with open(prefix+'/recipe.json','w') as f:
      json.dump(recipe_dic,f,indent=4)
           
    if epoch % 1 == 0:
        print("\nvalidation step")
        LX = 0.0
        LZ = 0.0        
        counter = 0
        for iter,(img_array,label_array) in enumerate(mnist.gen_test(batchsize)):
            B = img_array.shape[0]
            Lz = XP.fzeros(())
            vae.reset(img_array)
            
            #first to T-1 step
            for j in range(times-1):
                y,kl = vae.free_energy_onestep()
                Lz_i = alpha*kl
                Lz += Lz_i           
            #last step
            j+=1
            y,kl = vae.free_energy_onestep()
            Lz_i = alpha*kl
            Lz += Lz_i  
            Lx = Bernoulli_nll_wesp(vae.x,y,eps)

            LZ += Lz.data.reshape(())
            LX += Lx.data.reshape(())

            counter += B
            sys.stdout.write('\rnow testing ...  epoch {}, {}/{}  '.format(epoch,counter,test_size))
            sys.stdout.flush()
        print("")
        trace(save_path+"/valloss.txt","epoch {} Lx:{} Lz:{} Lx+Lz:{}".format(epoch,LX/test_size,LZ/test_size,(LX+LZ)/test_size))                  		
        valloss_dic[str(epoch).zfill(3)]={
                        "Lx":float(LX/test_size),
						"Lz":float(LZ/test_size),
						"Lx+Lz":float((LX+LZ)/test_size)}
        with open(save_path+"/valloss.json",'w') as f:
            json.dump(valloss_dic,f,indent=4)

        img_array = cuda.to_cpu(y.data)
        im_array = img_array.reshape(batchsize*28,28)
        img = im_array[:28*5]
        plt.clf()
        plt.imshow(img,cmap=cm.gray)
        plt.colorbar(orientation='horizontal')
        plt.savefig(save_path+"/"+"img_test{}.png".format(epoch))
  print('finished.') 
Ejemplo n.º 14
0
def train(args):
    trace('making vocabularies ...')
    src_vocab = Vocabulary.new(gens.input_word_list(), args.vocab)
    trg_vocab = Vocabulary.new(gens.output_word_list(), args.vocab)
    trace('making model ...')
    encdec = EncoderDecoder(args.vocab, args.embed, args.hidden)

    if args.load_model != "":
        print("model load  %s ... " % (args.load_model))
        src_vocab = Vocabulary.load(args.load_model + '.srcvocab')
        trg_vocab = Vocabulary.load(args.load_model + '.trgvocab')
        encdec = EncoderDecoder.load_spec(args.load_model + '.spec')
        serializers.load_hdf5(args.load_model + '.weights', encdec)

    if args.use_gpu:
        encdec.to_gpu()

    for epoch in range(args.epoch):
        trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
        trained = 0
        gen1 = gens.input_word_list()
        gen2 = gens.output_word_list()
        gen3 = gens.batch(
            gens.sorted_parallel(gen1, gen2, 100 * args.minibatch),
            args.minibatch)
        opt = optimizers.AdaGrad(lr=0.01)
        opt.setup(encdec)
        opt.add_hook(optimizer.GradientClipping(5))

        for src_batch, trg_batch in gen3:
            src_batch = fill_batch(src_batch)
            trg_batch = fill_batch(trg_batch)
            K = len(src_batch)
            hyp_batch, loss = forward(src_batch, trg_batch, src_vocab,
                                      trg_vocab, encdec, True, 0)
            loss.backward()
            opt.update()

            for k in range(K):
                trace('epoch %3d/%3d, sample %8d' %
                      (epoch + 1, args.epoch, trained + k + 1))
                trace(
                    '  src = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in src_batch[k]]))
                trace(
                    '  trg = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in trg_batch[k]]))
                trace(
                    '  hyp = ' +
                    ' '.join([x if x != '</s>' else '*'
                              for x in hyp_batch[k]]))

            trained += K

        if epoch % args.model_save_timing == 0:

            trace('saving model ...')
            prefix = args.model + '.%03.d' % (epoch + 1)
            src_vocab.save(prefix + '.srcvocab')
            trg_vocab.save(prefix + '.trgvocab')
            encdec.save_spec(prefix + '.spec')
            serializers.save_hdf5(prefix + '.weights', encdec)

    trace('finished.')
Ejemplo n.º 15
0
def train(args):
    if args.gpu > -1:
        cuda.get_device(args.gpu).use()
        xp = cuda.cupy
    else:
        xp = np

    if args.log:
        log_dir = args.log
    else:
        log_dir = os.path.join(os.path.abspath(os.path.dirname(__file__)), '{}_{}'.format(DIR_NAME, datetime.now().strftime('%Y%m%d_%H:%M')))

    if not os.path.exists(log_dir):
        os.mkdir(log_dir)

    # setting for logging
    logger = logging.getLogger()
    logging.basicConfig(level=logging.INFO)
    log_path = os.path.join(log_dir, 'log')
    file_handler = logging.FileHandler(log_path)
    fmt = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    file_handler.setFormatter(fmt)
    logger.addHandler(file_handler)

    logger.info('Arguments...')
    for arg, val in vars(args).items():
        logger.info('{} : {}'.format(arg, val))

    logger.info('Loading Vocab...')
    vocab = Vocab()
    vocab.load(args.vocab, args.lowercase)
    vocab.add_special_token()

    sufvocab = Vocab()
    sufvocab.load(args.sufvocab, args.lowercase)
    sufvocab.add_special_token(['s>', '<UNK>'])

    pos2id = Vocab()
    pos2id.load(args.poslist)

    logger.info('preparation for training data...')
    out_path = making_data(args.train_data, args.window)

    model = WordCSnnTagger(args.wembed, args.fembed, args.hidden, len(vocab), len(sufvocab), len(pos2id), args.window, args.objct, args.alpha)
    model.save_model_config(log_dir)

    if args.gpu > -1:
        model.to_gpu()

    opt = getattr(optimizers, args.opt)()
    opt.setup(model)
    opt.add_hook(optimizer.GradientClipping(args.gclip))
    opt.add_hook(optimizer.WeightDecay(args.wdecay))

    for epoch in range(args.epoch):
        logger.info('START epoch {}/{}'.format(epoch + 1, args.epoch))
        start = time.time()
        sum_loss = xp.zeros((), dtype=xp.float32)
        n_data = 0
        n_correct = 0
        for i, [tags, contexts] in enumerate(line_iter(out_path, args.minibatch)):
            batch_ts = xp.array([pos2id[tag] for tag in tags], dtype=xp.int32)
            batch_caps = xp.array([[get_capf(word) for word in context] for context in contexts], dtype=xp.int32)
            if args.lowercase:
                contexts = [[word.lower() for word in context] for context in contexts]
            batch_xs = xp.array([[vocab[word] for word in context] for context in contexts], dtype=xp.int32)
            batch_sufs = xp.array([[sufvocab[word[-2:]] for word in context] for context in contexts], dtype=xp.int32)
            batch_caps = xp.array([[get_capf(word) for word in context] for context in contexts], dtype=xp.int32)
            batch_features = [batch_xs, batch_sufs, batch_caps]
            cur_batch_size = batch_ts.shape[0]
            ys, loss = model(batch_features, batch_ts)
            sum_loss += loss.data * cur_batch_size
            model.zerograds()
            loss.backward()
            opt.update()
            pred_labels = ys.data.argmax(1)
            n_correct += sum(1 for j in range(cur_batch_size) if pred_labels[j] == batch_ts[j])
            n_data += cur_batch_size
            logger.info('done {} batches'.format(i + 1))
        logger.info('{} epoch train loss = {}'.format(epoch + 1, sum_loss))
        logger.info('{} epoch train accuracy = {}'.format(epoch + 1, float(n_correct / n_data)))
        logger.info('{} sec for training per epoch'.format(time.time() - start))

        if args.valid_data:
            start = time.time()
            valid_loss, valid_accuracy = evaluation(model, args.valid_data, pos2id, vocab, sufvocab, args)
            logger.info('{} epoch valid loss = {}'.format(epoch + 1, valid_loss))
            logger.info('{} epoch valid accuracy = {}'.format(epoch + 1, valid_accuracy))
            logger.info('{} sec for validation per epoch'.format(time.time() - start))

        if args.test_data:
            start = time.time()
            test_loss, test_accuracy = evaluation(model, args.test_data, pos2id, vocab, sufvocab, args)
            logger.info('{} epoch test loss = {}'.format(epoch + 1, test_loss))
            logger.info('{} epoch test accuracy = {}'.format(epoch + 1, test_accuracy))
            logger.info('{} sec for testing per epoch'.format(time.time() - start))

        logger.info('serializing...')
        prefix = '{}_{}ep_{}wembed_{}fembed_{}hidden_{}window_{}minibatch_{}opt'.format(DIR_NAME, epoch + 1, args.wembed, args.fembed, args.hidden, args.window, args.minibatch, args.opt)
        model_path = os.path.join(log_dir, prefix + '.model')
        model.save(model_path)

    logger.info('done training')
Ejemplo n.º 16
0
if __name__ == "__main__":
    set_seed()
    log_tracer = LogTracer(nn_type, sep_mode)

    log_tracer("get train data")
    train, test, n_vocab = get_train_data(pad, sep_mode)
    log_tracer.trace_label("train", train)
    log_tracer.trace_label("test", test)

    if nn_type == "lstm":
        mlp = LSTM(n_vocab, n_units, N_OUT)
    elif nn_type == "cnn":
        mlp = CNN(n_vocab, n_units, N_OUT)
    opt = optimizers.Adam()
    opt.setup(mlp)
    opt.add_hook(optimizer.WeightDecay(w_decay))
    opt.add_hook(optimizer.GradientClipping(g_clip))

    log_tracer("start train")
    for epoch in range(n_epoch):
        for x, t in generate_bath(train, n_batch):
            mlp.cleargrads()
            loss, acc = mlp(x, t, train=True)
            loss.backward()
            opt.update()
            log_tracer.trace_train(epoch, loss.data, acc.data)
        x_v, t_v = parse_batch(test)
        loss_v, acc_v = mlp(x_v, t_v)
        log_tracer.trace_test(epoch, loss_v.data, acc_v.data, True)
    mlp.save(sep_mode)
Ejemplo n.º 17
0
def training():
    parser = argparse.ArgumentParser()
    parser.add_argument('--hidden_size', type=int, default=200)
    parser.add_argument('--dropout', '-d', type=float, default=0.2)
    parser.add_argument('--batch_size', '-b', type=int, default=15)
    parser.add_argument('--batch_col_size', type=int, default=20)
    parser.add_argument('--epoch', '-e', type=int, default=50)
    parser.add_argument('--gpu', '-g', type=int, default=-1)
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--model', '-m', default='', type=str)
    args = parser.parse_args()

    print(json.dumps(args.__dict__, indent=2))

    # GPUのセット
    if args.gpu > -1:  # numpyかcuda.cupyか
        xp = cuda.cupy
        cuda.get_device(args.gpu).use()
    else:
        xp = np
    # 教師データ
    data = load_data()
    N = len(data)  # 教師データの数

    # 教師データの読み込み
    print('initialize DataConverter')
    data_converter = DataConverter(
        batch_col_size=args.batch_col_size)  # データコンバーター
    data_converter.load(data)  # 教師データ読み込み

    model = AttSeq2Seq(input_size=200,
                       hidden_size=args.hidden_size,
                       batch_col_size=args.batch_col_size,
                       dropout=args.dropout,
                       device=args.gpu)

    if args.gpu >= 0:
        model.to_gpu(0)
    if args.model != '':
        serializers.load_npz(args.model, model)

    opt = optimizers.Adam()
    opt.setup(model)
    opt.add_hook(optimizer.GradientClipping(5))

    model.reset()

    # 学習開始
    print("Train start")
    st = datetime.datetime.now()
    model_file_name = str(st)[:-7]
    for epoch in range(args.epoch):
        # ミニバッチ学習
        perm = np.random.permutation(N)  # ランダムな整数列リストを取得
        total_loss = 0
        for i in range(0, N, args.batch_size):
            enc_words = data_converter.train_queries[perm[i:i +
                                                          args.batch_size]]
            dec_words = data_converter.train_responses[perm[i:i +
                                                            args.batch_size]]
            model.reset()
            loss = model(enc_words=enc_words, dec_words=dec_words, train=True)
            loss.backward()
            loss.unchain_backward()
            total_loss += loss.data
            opt.update()
            print('{0}/{1}:'.format(i, N), end='\t', flush=True)
        #output_path = "./att_seq2seq_network/{}_{}.network".format(epoch+1, total_loss)
        #serializers.save_npz(output_path, model)
        ed = datetime.datetime.now()
        print("\nepoch:\t{0}\ttotal loss:\t{1}\ttime:\t{2}".format(
            epoch + 1, total_loss, ed - st))
        st = datetime.datetime.now()
        model.to_cpu()
        serializers.save_npz("model/{0}_epoch-{1}.npz".format(
            model_file_name, epoch + 1), model)  # npz形式で書き出し
        model.to_gpu()
Ejemplo n.º 18
0
    data_converter = DataConverter(batch_col_size=BATCH_COL_SIZE)  # データコンバーター
    data_converter.load(data)  # 教師データ読み込み
    vocab_size = len(data_converter.vocab)  # 単語数
    print("単語数:", vocab_size)
    PrintTime("単語ID変換")
    # pprint.pprint(sorted(data_converter.vocab.items(), key=lambda x:x[1]))
    # モデルの宣言
    model = AttSeq2Seq(vocab_size=vocab_size,
                       embed_size=EMBED_SIZE,
                       hidden_size=HIDDEN_SIZE,
                       batch_col_size=BATCH_COL_SIZE)
    # ネットワークファイルの読み込み
    network = ".\\mine\\data\\network\\{file_name}\\sample1.network".format(
        file_name=file_name)
    serializers.load_npz(network, model)
    opt = optimizers.Adam()
    opt.setup(model)
    opt.add_hook(optimizer.GradientClipping(5))
    if FLAG_GPU:
        model.to_gpu(0)
    model.reset()
    PrintTime("読み込み")
    # epoch = []
    # StudyStart(".\\mine\\data\\network\\{file_name}\\sample1.network".format(file_name=file_name))
    ConsoleInputText()  #コンソールからの入力
    # SpeechAnalysis() #コサイン類似度判定
    # SpeechStart() # 学習データ入力からの推測
    # SpeechAnswer(data) # 学習データ計測、欠如データ計測
    # print(SpeechOneText())
    PrintTime("---END---")
Ejemplo n.º 19
0
    def for_one_batch_training(self):
        loss_list = []
        text_count = 0
        model_list = glob.glob(
            "_".join(self.OUTPUT_PATH.format("model", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, "*", 0).split("_")[:-1]))
        model = Att_Seq2TF(emb_size=self.EMBED_SIZE,
                           fnn_size=self.FNN_SIZE,
                           hidden_size=self.HIDDEN_SIZE,
                           num_of_middle_layer=self.num_of_middle_layer,
                           use_dropout=self.USE_DROPOUT,
                           flag_gpu=self.FLAG_GPU)
        if len(model_list) != 0:
            for model_cand in sorted(model_list, key=lambda x: int(x.split("_")[-2][9:])):
                loss_list.append(
                    float(model_cand[model_cand.find("loss") + 4:model_cand.rfind(".")]))
            serializers.load_hdf5(model_cand, model)
            text_count = int(model_cand.split("_")[-2][9:])
            print(model_cand)
            print(text_count)
            print(loss_list)
        if self.FLAG_GPU:
            model.to_gpu(0)
        model.reset()
        # print("d")
        opt = optimizers.Adam()
        # optimizer.use_cleargrads()
        opt.setup(model)
        opt.add_hook(optimizer.WeightDecay(0.0005))
        opt.add_hook(optimizer.GradientClipping(5))
        opt_list = glob.glob(
            "_".join(self.OUTPUT_PATH.format("opt", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, "*", 0).split("_")[:-1]))
        if len(opt_list) != 0:
            opt_list = sorted(
                opt_list, key=lambda x: int(x.split("_")[-2][9:]))
            serializers.load_hdf5(opt_list[-1], opt)
            print(opt_list[-1])

        # rupe_of_trainging
        # train_losses = []
        # test_losses = []
        print("start...")
        start_time = time.time()
        # 学習開始
        q = Queue(100)
        q_valid = Queue(500)
        q_valid1 = Queue(500)
        minibatch_maker = MinibatchMaker(
            self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 0)
        p = Process(target=minibatch_maker.epoch_pickle, args=(q, ))
        p.start()
        # minibatch_maker1 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 1)
        # p1 = Process(target=minibatch_maker1.epoch_factory, args=(q, ))
        # p1.start()
        # minibatch_maker2 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 2)
        # p2 = Process(target=minibatch_maker2.epoch_factory, args=(q, ))
        # p2.start()
        # minibatch_maker3 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 3)
        # p3 = Process(target=minibatch_maker3.epoch_factory, args=(q, ))
        # p3.start()
        # minibatch_maker4 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 4)
        # p4 = Process(target=minibatch_maker4.epoch_factory, args=(q, ))
        # p4.start()
        # minibatch_maker5 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 5)
        # p5 = Process(target=minibatch_maker5.epoch_factory, args=(q, ))
        # p5.start()
        # minibatch_maker6 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 6)
        # p6 = Process(target=minibatch_maker6.epoch_factory, args=(q, ))
        # p6.start()
        # minibatch_maker7 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 7)
        # p7 = Process(target=minibatch_maker7.epoch_factory, args=(q, ))
        # p7.start()
        # minibatch_maker8 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 8)
        # p8 = Process(target=minibatch_maker8.epoch_factory, args=(q, ))
        # p8.start()
        # minibatch_maker9 = MinibatchMaker(
        #     self.FEATURE_TYPE, self.FLAG_GPU, "train", text_count, 9)
        # p9 = Process(target=minibatch_maker9.epoch_factory, args=(q, ))
        # p9.start()
        #train_len = q.get()
        minibatch_maker_valid = MinibatchMaker(
            self.FEATURE_TYPE, self.FLAG_GPU, "valid", text_div=0)
        p_valid = Process(
            target=minibatch_maker_valid.epoch_pickle, args=(q_valid, ))
        p_valid.start()
        minibatch_maker_valid1 = MinibatchMaker(
            self.FEATURE_TYPE, self.FLAG_GPU, "valid", text_div=1)
        p_valid1 = Process(
            target=minibatch_maker_valid1.epoch_pickle, args=(q_valid1, ))
        p_valid1.start()
        valid_len = q_valid.get()
        valid_len1 = q_valid1.get()
        #valid_len1 = 0
        #print("altsvm" + str(train_len))
        print("altsvm" + str(valid_len))
        print("altsvm" + str(valid_len1))
        # p.terminate()
        # p_valid.terminate()
        # exit()
        waited_count = 0
        verb_data_count = 0
        pseudo_epoch_count = 0
        train_dict_keep = None
        while waited_count < 100 and (len(loss_list) <= 10 or min(loss_list[-10:]) != loss_list[-10]):
            if not q.empty():
                # print("something")
                text_count += self.EPOCH_TEXT
                try_count = 0
                # while try_count < 5:
                #     try:
                #         try_count += 1
                #         print(str(q.full()))
                enc_words, fnn_inputs, dec_scores = q.get()
                # if text_sentence_vec_dict != None:
                #train_dict_keep = text_sentence_vec_dict
                # except Exception as e:
                #     print("cant_get")
                #     print(e)
                # if len(x_train) > 0:
                #     print("can_get")
                #     break
                # sys.exit()
                N = len(dec_scores)
                verb_data_count += N
                if N != 0:
                    # training
                    start_time_train = time.time()
                    perm = np.random.permutation(N)
                    sum_loss = 0
                    # print("first_verb")
                    for i in range(0, N, self.BATCH_SIZE):
                        # print(i)
                        if self.FLAG_GPU:
                            enc_words_batch = []
                            for x in perm[i:i + self.BATCH_SIZE]:
                                enc_words_batch.append(enc_words[x])
                                # enc_words_batch.append(
                                #     train_dict_keep[enc_words[x][0]][enc_words[x][1]])
                            # enc_words_batch = cuda.to_gpu(
                            #    np.array(enc_words_batch), device=0)
                            fnn_inputs_batch = cuda.to_gpu(fnn_inputs[
                                perm[i:i + self.BATCH_SIZE]], device=0)
                            dec_scores_batch = cuda.to_gpu(dec_scores[
                                perm[i:i + self.BATCH_SIZE]], device=0)
                        else:
                            enc_words_batch = []
                            for x in perm[i:i + self.BATCH_SIZE]:
                                enc_words_batch.append(enc_words[x])
                                # enc_words_batch.append(
                                #     train_dict_keep[enc_words[x][0]][enc_words[x][1]])
                            fnn_inputs_batch = fnn_inputs[
                                perm[i:i + self.BATCH_SIZE]]
                            dec_scores_batch = dec_scores[
                                perm[i:i + self.BATCH_SIZE]]

                        # modelのリセット
                        model.reset()
                        # 順伝播
                        model.encode(enc_words_batch)
                        # デコーダーの計算
                        loss = model.decode(fnn_inputs_batch, dec_scores_batch)
                        # print(loss)
                        sum_loss += loss.data * len(dec_scores_batch)
                        loss.backward()
                        opt.update()
                    # print("first_verb_finished")
                    average_loss = sum_loss / N
                    # train_losses.append(average_loss)
                    interval = int(time.time() - start_time_train)
                    #print("train実行時間: {}sec, N: {}".format(interval,N))

                # test
                # loss = model(x_test, y_test)
                # test_losses.append(loss.data)

                # output learning process
                if text_count % 100 == 0:
                    print("text_count: {} train loss: {} verb_data_count: {} time: {}".format(
                        text_count, average_loss, verb_data_count, time.ctime()))
                if verb_data_count // self.EPOCH_LIMIT > pseudo_epoch_count:
                    pseudo_epoch_count += 1
                    # print(verb_data_count)
                    # print(pseudo_epoch_count)

                    total_loss = 0
                    total_count = 0
                    valid_dict_keep = None
                    model.mode_change("test")
                    #chainer.config.train = False
                    valid_count = 0
                    valid1_count = 0
                    while (valid_count + valid1_count) < (valid_len + valid_len1):
                        if valid_count < valid_len and not q_valid.empty():
                            enc_words, fnn_inputs, dec_scores = q_valid.get()
                            valid_count += 1
                        elif valid1_count < valid_len1 and not q_valid1.empty():
                            enc_words, fnn_inputs, dec_scores = q_valid1.get()
                            valid1_count += 1
                        else:
                            print("waiting valid " + str(valid_count) +
                                  " " + str(valid1_count))
                            time.sleep(10)
                            continue
                        # if text_sentence_vec_dict != None:
                        #valid_dict_keep = text_sentence_vec_dict
                        if len(dec_scores) == 0:
                            continue
                        N = len(dec_scores)
                        for i in range(0, N, self.BATCH_SIZE):
                            if self.FLAG_GPU:
                                enc_words_batch = []
                                for x in enc_words[i:i + self.BATCH_SIZE]:
                                    enc_words_batch.append(x)
                                    # enc_words_batch.append(
                                    #     valid_dict_keep[x[0]][x[1]])
                                # enc_words_batch = cuda.to_gpu(
                                #    enc_words_batch, device=0)
                                fnn_inputs_batch = cuda.to_gpu(
                                    fnn_inputs[i:i + self.BATCH_SIZE], device=0)
                                dec_scores_batch = cuda.to_gpu(
                                    dec_scores[i:i + self.BATCH_SIZE], device=0)
                            else:
                                enc_words_batch = []
                                for x in enc_words[i:i + self.BATCH_SIZE]:
                                    enc_words_batch.append(x)
                                    # enc_words_batch.append(
                                    #     valid_dict_keep[x[0]][x[1]])
                                # enc_words_batch = cuda.to_gpu(
                                #    enc_words_batch, device=0)
                                fnn_inputs_batch = fnn_inputs[
                                    i:i + self.BATCH_SIZE]
                                dec_scores_batch = dec_scores[
                                    i:i + self.BATCH_SIZE]
                            # modelのリセット
                            model.reset()
                            if len(enc_words_batch) == 0:
                                print(len(enc_words))
                                print(len(dec_scores_batch))
                                print(i)
                                exit()

                            with chainer.no_backprop_mode():
                                # 順伝播
                                model.encode(enc_words_batch)
                                # デコーダーの計算
                                loss_data = model.decode(
                                    fnn_inputs_batch, dec_scores_batch).data
                                if not self.ARR.isnan(loss_data):
                                    total_loss += loss_data * \
                                        len(dec_scores_batch)
                                    total_count += len(dec_scores_batch)
                                else:
                                    print(loss_data)

                    if total_count == 0:
                        print("skipped")
                        continue
                    valid_loss = float(total_loss / total_count)
                    model.mode_change("train")
                    #chainer.config.train = True
                    # print(valid_loss)
                    # print(total_loss)
                    # print(total_count)
                    print("valid_count: {} valid loss: {} time: {}".format(
                        verb_data_count // self.EPOCH_LIMIT, valid_loss, time.ctime()))
                    try:
                        # with open("test", mode="wb") as f:
                        #    pickle.dump("hui",f)
                        # with open(self.OUTPUT_PATH.format("opt", self.FEATURE_TYPE, str(self.USE_DROPOUT), str(self.num_of_middle_layer), str(verb_count // self.EPOCH_LIMIT), valid_loss), mode="wb") as f:
                        #    pickle.dump(opt,f)
                        # print("will_save")
                        # model_saved=model.copy()
                        # model_saved.to_cpu()
                        # fui=float(70)
                        serializers.save_hdf5(  # "/gs/hs0/tga-cl/yamashiro-s-aa/workspace/nn/fnn/model/model",model)
                            self.OUTPUT_PATH.format("model", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, text_count, float(valid_loss)), model)
                        # print("model_saved")
                        serializers.save_hdf5(
                            self.OUTPUT_PATH.format("opt", self.FEATURE_TYPE, self.USE_DROPOUT, self.num_of_middle_layer, text_count, float(valid_loss)), opt)
                    except Exception as e:
                        raise e
                    # print("saved")
                    loss_list.append(valid_loss)

                    # q_valid.put((x_valid, y_valid))
                waited_count = 0
            else:
                print("waiting")
                time.sleep(10)
                print(str(text_count) + " " + str(q.qsize()))
                waited_count += 1

        print("end")
        p.terminate()
        # p1.terminate()
        # p2.terminate()
        # p3.terminate()
        # p4.terminate()
        # p5.terminate()
        # p6.terminate()
        # p7.terminate()
        # p8.terminate()
        # p9.terminate()
        p_valid.terminate()
        p_valid1.terminate()
        interval = int(time.time() - start_time)
        print("実行時間: {}sec, last pseudo_epoch: {}".format(
            interval, str(verb_data_count // self.EPOCH_LIMIT)))