Example #1
0
def train_loop(model, output_dir, xp, optimizer, res_q, data_q):
    graph_generated = False
    while True:
        while data_q.empty():
            time.sleep(0.1)
        inp = data_q.get()
        if inp == 'end':
            res_q.put('end')
            break
        elif inp == 'train':
            res_q.put('train')
            model.train = True
            continue
        elif inp == 'val':
            res_q.put('val')
            model.train = False
            continue
        volatile = 'off' if model.train else 'on'
        x = chainer.Variable(xp.asarray(inp[0]), volatile=volatile)
        t = chainer.Variable(xp.asarray(inp[1]), volatile=volatile)
        if model.train:
            optimizer.update(model, x, t)
            if not graph_generated:
                with open('graph.dot', 'w') as o:
                    o.write(computational_graph.build_computational_graph((model.loss,)).dump())
                print('generated graph')
                graph_generated = True
        else:
            model(x, t)
            
        serializers.save_hdf5(output_dir + os.sep + 'model%04d'%inp[2], model)
        #serializers.save_hdf5(output_dir + os.sep + 'optimizer%04d'%inp[2], optimizer)
        res_q.put((float(model.loss.data), float(model.accuracy.data), inp[2]))
        del x, t
Example #2
0
 def save_model(self, epoch):
     dpath  = "./model"
     if not os.path.exists(dpath):
         os.makedirs(dpath)
         
     fpath = "./model/generator_{:05d}.h5py".format(epoch)
     serializers.save_hdf5(fpath, self.generator)
Example #3
0
def train(args):
  trace('loading corpus ...')
  with open(args.source) as fp:
    trees = [make_tree(l) for l in fp]

  trace('extracting leaf nodes ...')
  word_lists = [extract_words(t) for t in trees]

  trace('extracting gold operations ...')
  op_lists = [make_operations(t) for t in trees]

  trace('making vocabulary ...')
  word_vocab = Vocabulary.new(word_lists, args.vocab)
  phrase_set = set()
  semi_set = set()
  for tree in trees:
    phrase_set |= set(extract_phrase_labels(tree))
    semi_set |= set(extract_semi_labels(tree))
  phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False)
  semi_vocab = Vocabulary.new([list(semi_set)], len(semi_set), add_special_tokens=False)

  trace('converting data ...')
  word_lists = [convert_word_list(x, word_vocab) for x in word_lists]
  op_lists = [convert_op_list(x, phrase_vocab, semi_vocab) for x in op_lists]

  trace('start training ...')
  parser = Parser(
      args.vocab, args.embed, args.queue, args.stack,
      len(phrase_set), len(semi_set),
  )
  if USE_GPU:
    parser.to_gpu()
  opt = optimizers.AdaGrad(lr = 0.005)
  opt.setup(parser)
  opt.add_hook(optimizer.GradientClipping(5))

  for epoch in range(args.epoch):
    n = 0
    
    for samples in batch(zip(word_lists, op_lists), args.minibatch):
      parser.zerograds()
      loss = my_zeros((), np.float32)

      for word_list, op_list in zip(*samples):
        trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1))
        loss += parser.forward(word_list, op_list, 0)
        n += 1
      
      loss.backward()
      opt.update()

    trace('saving model ...')
    prefix = args.model + '.%03.d' % (epoch + 1)
    word_vocab.save(prefix + '.words')
    phrase_vocab.save(prefix + '.phrases')
    semi_vocab.save(prefix + '.semiterminals')
    parser.save_spec(prefix + '.spec')
    serializers.save_hdf5(prefix + '.weights', parser)

  trace('finished.')
Example #4
0
 def snapshot(self):
     S.save_hdf5(osp.join(self.save_dir,
                          'vgg16_{0}.chainermodel'.format(self.i_iter)),
                 self.model)
     S.save_hdf5(osp.join(self.save_dir,
                          'vgg16_optimizer_{0}.h5'.format(self.i_iter)),
                 self.optimizer)
Example #5
0
    def train(self):
        """Iterate with train data."""
        log_templ = ('{i_iter}: type={type}, loss={loss}, acc={acc}, '
                     'acc_cls={acc_cls}, iu={iu}, fwavacc={fwavacc}')
        for i_iter in xrange(self.max_iter):
            self.i_iter = i_iter

            if (self.test_interval is not None) and \
               (i_iter % self.test_interval == 0):
                self.validate()

            type = 'train'
            self.model.train = True
            loss, acc, acc_cls, iu, fwavacc = self._iterate_once(type=type)
            log = dict(i_iter=self.i_iter, type=type, loss=loss, acc=acc,
                       acc_cls=acc_cls, iu=iu, fwavacc=fwavacc)
            print(log_templ.format(**log))
            self.logfile.write(
                '{i_iter},{type},{loss},{acc},{acc_cls},{iu},{fwavacc}\n'
                .format(**log))

            if i_iter % self.snapshot == 0:
                print('{0}: saving snapshot...'.format(i_iter))
                snapshot_model = osp.join(
                    self.log_dir,
                    'fcn32s_{0}.chainermodel'.format(i_iter))
                snapshot_optimizer = osp.join(
                    self.log_dir, 'fcn8s_optimizer_{0}.h5'.format(i_iter))
                S.save_hdf5(snapshot_model, self.model)
                S.save_hdf5(snapshot_optimizer, self.optimizer)
Example #6
0
    def test(self, x_l, y_l):
        y = F.softmax(self.mlp_enc(x_l, test=True))
        y_argmax = F.argmax(y, axis=1)
        acc = F.accuracy(y, y_l)
        y_l_cpu = cuda.to_cpu(y_l.data)
        y_argmax_cpu = cuda.to_cpu(y_argmax.data)

        # Confuction Matrix
        cm = confusion_matrix(y_l_cpu, y_argmax_cpu)
        print(cm)

        # Wrong samples
        idx = np.where(y_l_cpu != y_argmax_cpu)[0]
        #print(idx.tolist())

        # Generate and Save
        x_rec = self.mlp_dec(y, test=True)
        save_incorrect_info(x_rec.data[idx, ], x_l.data[idx, ],
                            y.data[idx, ], y_l.data[idx, ])

        # Save model
        serializers.save_hdf5("./model/mlp_encdec.h5py", self.model)

        loss = self.forward_for_losses(x_l, y_l, None, test=True)  # only measure x_l
        supervised_loss = loss
        return acc, supervised_loss
    def save_model(self):
        trace('saving model ...')
        prefix = self.model
        self.trg_vocab.save("model/" + prefix + '.trgvocab')
        self.encdec.save_spec("model/" + prefix + '.spec')
        serializers.save_hdf5("model/" + prefix + '.weights', self.encdec)

        trace('finished.')
Example #8
0
def save_model(filename, model):
    print('Saving trained model...')

    if os.path.exists(filename):
        print('Overwriting existing file {}'.format(filename))

    serializers.save_hdf5(filename, model)
    print('Saved trained model {}'.format(filename))
Example #9
0
def progress_func(epoch, loss, accuracy, validate_loss, validate_accuracy, test_loss, test_accuracy):
    print 'epoch: {} done'.format(epoch)
    print('train    mean loss={}, accuracy={}'.format(loss, accuracy))
    if validate_loss is not None and validate_accuracy is not None:
        print('validate mean loss={}, accuracy={}'.format(validate_loss, validate_accuracy))
    if test_loss is not None and test_accuracy is not None:
        print('test     mean loss={}, accuracy={}'.format(test_loss, test_accuracy))
    if epoch % 10 == 0:
        serializers.save_hdf5(args.output + '.model', model)
        serializers.save_hdf5(args.output + '.state', optimizer)
Example #10
0
 def save(self, filename):
     """ Save the model, the optimizer, vocabulary and config"""
     filename = os.path.abspath(filename)
     serializers.save_hdf5(filename + '.model', self.model)
     serializers.save_hdf5(filename + '.state', self.optimizer)
     cPickle.dump(self.vocab, open(filename + '.vocab', "w"))
     cPickle.dump(self.config, open(filename + '.config', "w"))
     with tarfile.open(filename, "w") as tar:
         for fn in [filename + '.model', filename + '.state', filename + '.vocab', filename + '.config']:
             tar.add(fn, arcname=os.path.basename(fn))
             os.remove(fn)
Example #11
0
	def save(self, dir=None):
		if dir is None:
			raise Exception()
		try:
			os.mkdir(dir)
		except:
			pass
		for attr in vars(self):
			prop = getattr(self, attr)
			if isinstance(prop, chainer.Chain) or isinstance(prop, chainer.optimizer.GradientMethod):
				serializers.save_hdf5(dir + "/%s_%s.hdf5" % (self.name, attr), prop)
		print "model saved."
def train(args):
    source_vocab = Vocab(args.source, args.vocab)
    target_vocab = Vocab(args.target, args.vocab)
    att_encdec = ABED(args.vocab, args.hidden_size, args.maxout_hidden_size, args.embed_size)
    if args.use_gpu:
        att_encdec.to_gpu()
    if args.source_validation:
        if os.path.exists(PLOT_DIR)==False: os.mkdir(PLOT_DIR)
        fp_loss = open(PLOT_DIR+"loss", "w")
        fp_loss_val = open(PLOT_DIR+"loss_val", "w")

    opt = optimizers.AdaDelta(args.rho, args.eps)
    opt.setup(att_encdec)
    opt.add_hook(optimizer.WeightDecay(DECAY_COEFF))
    opt.add_hook(optimizer.GradientClipping(CLIP_THR))
    for epoch in xrange(args.epochs):
        print "--- epoch: %s/%s ---"%(epoch+1, args.epochs)
        source_gen = word_list(args.source)
        target_gen = word_list(args.target)
        batch_gen = batch(sort(source_gen, target_gen, 100*args.minibatch), args.minibatch)
        n = 0
        total_loss = 0.0
        for source_batch, target_batch in batch_gen:
            n += len(source_batch)
            source_batch = fill_batch_end(source_batch)
            target_batch = fill_batch_end(target_batch)
            hyp_batch, loss = forward(source_batch, target_batch, source_vocab, target_vocab, att_encdec, True, 0)
            total_loss += loss.data*len(source_batch)
            closed_test(source_batch, target_batch, hyp_batch)

            loss.backward()
            opt.update()
            print "[n=%s]"%(n)
        print "[total=%s]"%(n)
        prefix = args.model_path + '%s'%(epoch+1)
        serializers.save_hdf5(prefix+'.attencdec', att_encdec)
        if args.source_validation:
            total_loss_val, n_val = validation_test(args, att_encdec, source_vocab, target_vocab)
            fp_loss.write("\t".join([str(epoch), str(total_loss/n)+"\n"]))
            fp_loss_val.write("\t".join([str(epoch), str(total_loss_val/n_val)+"\n"])) 
            fp_loss.flush()
            fp_loss_val.flush()
        hyp_params = att_encdec.get_hyper_params()
        Backup.dump(hyp_params, args.model_path+HPARAM_NAME)
        source_vocab.save(args.model_path+SRC_VOCAB_NAME)
        target_vocab.save(args.model_path+TAR_VOCAB_NAME)
    hyp_params = att_encdec.get_hyper_params()
    Backup.dump(hyp_params, args.model_path+HPARAM_NAME)
    source_vocab.save(args.model_path+SRC_VOCAB_NAME)
    target_vocab.save(args.model_path+TAR_VOCAB_NAME)
    if args.source_validation:
        fp_loss.close()
        fp_loss_val.close()
Example #13
0
File: lstm.py Project: musyoku/NLP
	def save(self, dir=None):
		if dir is None:
			raise Exception()
		try:
			os.mkdir(dir)
		except:
			pass
		for attr in vars(self):
			prop = getattr(self, attr)
			if self.should_save(prop):
				serializers.save_hdf5(dir + "/%s_%s.hdf5" % (self.name, attr), prop)
		print "model saved."
Example #14
0
	def save(self):
		serializers.save_hdf5("fc_value.model", self.fc_value)
		serializers.save_hdf5("fc_advantage.model", self.fc_advantage)
		print "model saved."
		serializers.save_hdf5("fc_value.optimizer", self.optimizer_fc_value)
		serializers.save_hdf5("fc_advantage.optimizer", self.optimizer_fc_advantage)
		print "optimizer saved."
    def train(self):
        """
        Train method
        If you use the word2vec model, you possible to use the copy weight
        Optimizer method use the Adagrad
        """
        trace("making vocabularies ...")
        src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace("making model ...")
        self.attention_dialogue = AttentionDialogue(self.vocab, self.embed, self.hidden, self.XP)
        if self.word2vecFlag:
            self.copy_model(self.word2vec, self.attention_dialogue.emb)
            self.copy_model(self.word2vec, self.attention_dialogue.dec, dec_flag=True)

        for epoch in range(self.epoch):
            trace("epoch %d/%d: " % (epoch + 1, self.epoch))
            trained = 0
            gen1 = gens.word_list(self.source)
            gen2 = gens.word_list(self.target)
            gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch)
            opt = optimizers.AdaGrad(lr=0.01)
            opt.setup(self.attention_dialogue)
            opt.add_hook(optimizer.GradientClipping(5))

            random_number = random.randint(0, self.minibatch - 1)
            for src_batch, trg_batch in gen3:
                src_batch = fill_batch(src_batch)
                trg_batch = fill_batch(trg_batch)
                K = len(src_batch)
                hyp_batch, loss = self.forward_implement(
                    src_batch, trg_batch, src_vocab, trg_vocab, self.attention_dialogue, True, 0
                )
                loss.backward()
                opt.update()

                self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch)

                trained += K

        trace("saving model ...")
        prefix = self.model
        model_path = APP_ROOT + "/model/" + prefix
        src_vocab.save(model_path + ".srcvocab")
        trg_vocab.save(model_path + ".trgvocab")
        self.attention_dialogue.save_spec(model_path + ".spec")
        serializers.save_hdf5(model_path + ".weights", self.attention_dialogue)

        trace("finished.")
Example #16
0
    def serialize(self, epoch, filename):
        # Create dir path
        dpath = os.path.join(filename, "./model_{:05d}".format(epoch))
        if os.path.exists(dpath):
            shutil.rmtree(dpath)
            os.makedirs(dpath)
        else:
            os.makedirs(dpath)

        # Serialize
        fpath = os.path.join(dpath, "encoder.h5py")
        serializers.save_hdf5(fpath, self.encoder)
        fpath = os.path.join(dpath, "generator.h5py")
        serializers.save_hdf5(fpath, self.generator)
Example #17
0
  def saveData(self):
    try:
      # save model file
      serializers.save_hdf5(MODEL_FILE, self.brain)
      print "succeed to save model"

      # save history file
      #fp = open(HISTORY_FILE, "w")
      #self.brain.state.dump(HISTORY_FILE)
      #pickle.dump(self.brain, fp)
      #fp.close()
      #print "succeed to save history."
    except:
      print "failed to save history."
Example #18
0
def train(gen, dis, optimizer_gen, optimizer_dis, x_train, epoch_num, gpu_device=None, out_image_dir=None):
    if gpu_device == None:
        gen.to_cpu()
        dis.to_cpu()
        xp = np
    else:
        gen.to_gpu(gpu_device)
        dis.to_gpu(gpu_device)
        xp = cuda.cupy
    out_image_len = 20
    z_out_image = Variable(xp.random.uniform(-1, 1, (out_image_len, LATENT_SIZE)).astype(np.float32))
    for epoch in xrange(1, epoch_num + 1):
        x_size = len(x_train)
        perm = np.random.permutation(x_size)
        sum_loss_gen = 0
        sum_loss_dis = 0
        for i in xrange(0, x_size, BATCH_SIZE):
            x_batch = x_train[perm[i : i + BATCH_SIZE]]
            loss_dis = train_dis(gen, dis, optimizer_gen, optimizer_dis, x_batch, gpu_device)
            sum_loss_dis += float(loss_dis)
            loss_gen = train_gen(gen, dis, optimizer_gen, optimizer_dis, x_batch, gpu_device)
            sum_loss_gen += float(loss_gen)
        print "epoch: {} done".format(epoch)
        print ("gen loss={}".format(sum_loss_gen / x_size))
        print ("dis loss={}".format(sum_loss_dis / x_size))
        serializers.save_hdf5(args.output + ".gen.model", gen)
        serializers.save_hdf5(args.output + ".gen.state", optimizer_gen)
        serializers.save_hdf5(args.output + ".dis.model", dis)
        serializers.save_hdf5(args.output + ".dis.state", optimizer_dis)
        if out_image_dir != None:
            data_array = gen(z_out_image, train=False).data
            for i, data in enumerate(data_array):
                image = Image.fromarray((cuda.to_cpu(data) * 256).astype(np.uint8).reshape(data.shape[1:3]))
                image.save("{0}/{1:03d}_{2:03d}.png".format(out_image_dir, epoch, i))
Example #19
0
def main():

    docs = get_docs()
    texts = make_texts(docs, single=False)
    questions = get_questions()
    texts.extend(questions)
    texts = preprocess_text(texts)
    texts = [t for t in texts if t]

    tokens, vocab = preprocess.tokenize(texts, 7500, tag=False, parse=False, entity=False)
    log.info("Got tokens and vocabulary. Vocab size: %d" % len(vocab))

    corpus, flat_corpus, doc_ids, clean_set = make_corpus(tokens=tokens, min_count=50)
    log.info("Got corpus")

    # Model Parameters
    # Number of documents
    n_docs = len(texts)
    log.info("number of texts: %d" % n_docs)
    # Number of unique words in the vocabulary
    n_words = flat_corpus.max() + 1
    # Number of dimensions in a single word vector
    n_hidden = 128
    # Number of topics to fit
    n_topics = 20
    # Get the count for each key
    counts = corpus.keys_counts[:n_words]
    # Get the string representation for every compact key
    words = corpus.word_list(vocab)[:n_words]
    log.info("Words: \n %s" % words)

    # Fit the model
    log.info("fitting the model")
    model = LDA2Vec(n_words, n_hidden, counts, dropout_ratio=0.2)
    model.add_categorical_feature(n_docs, n_topics, name="document_id")
    model.finalize()
    if os.path.exists("model.hdf5"):
        serializers.load_hdf5("model.hdf5", model)
    for _ in range(200):
        log.info("attempt #%d" % _)
        model.top_words_per_topic("document_id", words)
        log.info("TOP_WORDS_PER_TOPIC!\n => ")
        log.info(model.top_words_per_topic("document_id", words))
        log.info("========")
        model.fit(flat_corpus, categorical_features=[doc_ids], fraction=1e-3, epochs=1)
        model.to_cpu()
    serializers.save_hdf5("model.hdf5", model)
    model.top_words_per_topic("document_id", words)
Example #20
0
 def save(self):
     model_dir = self._model_dir
     filenames = Trainer.get_model_filenames(self.name, self.params['current_epoch'])
     serializers.save_hdf5(os.path.join(model_dir, filenames['model_gen']), self.dcgan.gen)
     serializers.save_hdf5(os.path.join(model_dir, filenames['model_dis']), self.dcgan.dis)
     serializers.save_hdf5(os.path.join(model_dir, filenames['opt_gen']), self.opt_gen)
     serializers.save_hdf5(os.path.join(model_dir, filenames['opt_dis']), self.opt_dis)
     with open(os.path.join(Trainer.MODEL_DIR, '{}.json'.format(self.name)), 'w') as f:
         f.write(json.dumps(self.params, indent=2))
Example #21
0
def train_dcgan_labeled(images, gen, dis):
    o_gen = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_gen.setup(gen)
    o_dis.setup(dis)
    o_gen.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001))

    zeros = Variable(xp.zeros(batchsize, dtype=np.int32))
    ones = Variable(xp.ones(batchsize, dtype=np.int32))

    for epoch in tqdm(range(n_epoch)):
        # discriminator
        # 0: from dataset
        # 1: from noise

        # train generator
        z = xp.random.uniform(-1, 1, (batchsize, nz), dtype=np.float32)
        z = Variable(z)
        x = gen(z)
        yl = dis(x)
        L_gen = F.softmax_cross_entropy(yl, zeros)
        L_dis = F.softmax_cross_entropy(yl, ones)

        # train discriminator
        x = generate_data(images)
        yl = dis(x)
        L_dis += F.softmax_cross_entropy(yl, zeros)

        o_gen.zero_grads()
        L_gen.backward()
        o_gen.update()

        o_dis.zero_grads()
        L_dis.backward()
        o_dis.update()

        if epoch % image_save_interval == 0 and epoch > 0:
            z = zvis
            z[50:, :] = xp.random.uniform(-1, 1, (50, nz), dtype=np.float32)
            z = Variable(z)
            x = gen(z, test=True)

            filename = '{}/vis_{}.png'.format(out_image_dir, epoch)
            generate_and_save(filename, x.data.get())

            path = join(out_model_dir, "dcgan_model_dis_{}.h5".format(epoch))
            serializers.save_hdf5(path, dis)

            path = join(out_model_dir, "dcgan_model_gen_%d.h5".format(epoch))
            serializers.save_hdf5(path, gen)

            path = join(out_model_dir, "dcgan_state_dis_%d.h5".format(epoch))
            serializers.save_hdf5(path, o_dis)

            path = join(out_model_dir, "dcgan_state_gen_%d.h5".format(epoch))
            serializers.save_hdf5(path, o_gen)
    def train(self):
        trace('making vocabularies ...')
        src_vocab = Vocabulary.new(gens.word_list(self.source), self.vocab)
        trg_vocab = Vocabulary.new(gens.word_list(self.target), self.vocab)

        trace('making model ...')
        encdec = EncoderDecoder(self.vocab, self.embed, self.hidden)
        if self.word2vecFlag:
            self.copy_model(self.word2vec, encdec.enc)
            self.copy_model(self.word2vec, encdec.dec, dec_flag=True)
        else:
            encdec = self.encdec

        for epoch in range(self.epoch):
            trace('epoch %d/%d: ' % (epoch + 1, self.epoch))
            trained = 0
            gen1 = gens.word_list(self.source)
            gen2 = gens.word_list(self.target)
            gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * self.minibatch), self.minibatch)
            opt = optimizers.AdaGrad(lr = 0.01)
            opt.setup(encdec)
            opt.add_hook(optimizer.GradientClipping(5))

            random_number = random.randint(0, self.minibatch - 1)
            for src_batch, trg_batch in gen3:
                src_batch = fill_batch(src_batch)
                trg_batch = fill_batch(trg_batch)
                K = len(src_batch)
                hyp_batch, loss = self.forward(src_batch, trg_batch, src_vocab, trg_vocab, encdec, True, 0)
                loss.backward()
                opt.update()

                if trained == 0:
                    self.print_out(random_number, epoch, trained, src_batch, trg_batch, hyp_batch)

                trained += K

        trace('saving model ...')
        prefix = self.model
        src_vocab.save(prefix + '.srcvocab')
        trg_vocab.save(prefix + '.trgvocab')
        encdec.save_spec(prefix + '.spec')
        serializers.save_hdf5(prefix + '.weights', encdec)

        trace('finished.')
Example #23
0
    def train_and_test(self, n_epoch=100, batchsize=100):

        epoch = 1
        best_accuracy = 0
        while epoch <= n_epoch:
            print 'epoch', epoch

            perm = np.random.permutation(self.n_train)
            sum_train_accuracy = 0
            sum_train_loss = 0
            for i in xrange(0, self.n_train, batchsize):
                x_batch = self.x_train[perm[i:i+batchsize]]
                y_batch = self.y_train[perm[i:i+batchsize]]

                real_batchsize = len(x_batch)

                self.optimizer.zero_grads()
                loss, acc = self.model.forward(x_batch, y_batch, train=True, gpu=self.gpu)
                loss.backward()
                self.optimizer.update()

                sum_train_loss += float(cuda.to_cpu(loss.data)) * real_batchsize
                sum_train_accuracy += float(cuda.to_cpu(acc.data)) * real_batchsize

            print 'train mean loss={}, accuracy={}'.format(sum_train_loss/self.n_train, sum_train_accuracy/self.n_train)

            # evaluation
            sum_test_accuracy = 0
            sum_test_loss = 0
            for i in xrange(0, self.n_test, batchsize):
                x_batch = self.x_test[i:i+batchsize]
                y_batch = self.y_test[i:i+batchsize]

                real_batchsize = len(x_batch)

                loss, acc = self.model.forward(x_batch, y_batch, train=False, gpu=self.gpu)

                sum_test_loss += float(cuda.to_cpu(loss.data)) * real_batchsize
                sum_test_accuracy += float(cuda.to_cpu(acc.data)) * real_batchsize

            print 'test mean loss={}, accuracy={}'.format(sum_test_loss/self.n_test, sum_test_accuracy/self.n_test)

            epoch += 1

        serializers.save_hdf5('doll_model', self.model)
def train(args):
  trace('making vocabularies ...')
  src_vocab = Vocabulary.new(gens.word_list(args.source), args.vocab)
  trg_vocab = Vocabulary.new(gens.word_list(args.target), args.vocab)

  trace('making model ...')
  attmt = AttentionMT(args.vocab, args.embed, args.hidden)
  if args.use_gpu:
    attmt.to_gpu()

  for epoch in range(args.epoch):
    trace('epoch %d/%d: ' % (epoch + 1, args.epoch))
    trained = 0
    gen1 = gens.word_list(args.source)
    gen2 = gens.word_list(args.target)
    gen3 = gens.batch(gens.sorted_parallel(gen1, gen2, 100 * args.minibatch), args.minibatch)
    opt = optimizers.AdaGrad(lr = 0.01)
    opt.setup(attmt)
    opt.add_hook(optimizer.GradientClipping(5))

    for src_batch, trg_batch in gen3:
      src_batch = fill_batch(src_batch)
      trg_batch = fill_batch(trg_batch)
      K = len(src_batch)
      hyp_batch, loss = forward(src_batch, trg_batch, src_vocab, trg_vocab, attmt, True, 0)
      loss.backward()
      opt.update()

      for k in range(K):
        trace('epoch %3d/%3d, sample %8d' % (epoch + 1, args.epoch, trained + k + 1))
        trace('  src = ' + ' '.join([x if x != '</s>' else '*' for x in src_batch[k]]))
        trace('  trg = ' + ' '.join([x if x != '</s>' else '*' for x in trg_batch[k]]))
        trace('  hyp = ' + ' '.join([x if x != '</s>' else '*' for x in hyp_batch[k]]))

      trained += K

    trace('saving model ...')
    prefix = args.model + '.%03.d' % (epoch + 1)
    src_vocab.save(prefix + '.srcvocab')
    trg_vocab.save(prefix + '.trgvocab')
    attmt.save_spec(prefix + '.spec')
    serializers.save_hdf5(prefix + '.weights', attmt)

  trace('finished.')
    def train(self):
        cur_log_perp = self.mod.zeros(())
        accum_loss = 0
        print('[train]\ngoing to train %d epochs' % self.n_epoch)

        for epoch in range(self.n_epoch):
            if epoch <= self.n_epoch / 2:
                train_data = self.generate_data()
            else:
                train_data = self.generate_data(go_away_from_start=True)

            for i in range(self.sequence_length):
                x = self.toVariable(train_data['input'][i], dtype='float32')
                t = self.toVariable(train_data['output'][i], dtype='int32')
                h, y = self.model(x)
                loss_i, accuracy_i = self.loss(y, t)
                accum_loss += loss_i
                cur_log_perp += loss_i.data

                # truncated BPTT
                if (i + 1) % self.backprop_length == 0:
                    self.model.zerograds()
                    accum_loss.backward()
                    accum_loss.unchain_backward()  # truncate
                    accum_loss = 0
                    self.optimizer.update()

            if (epoch + 1) % self.validation_timing == 0:
                now = time.time()
                throughput = self.validation_timing / float(now - prev) \
                    if 'prev' in vars() else 0

                train_perp, valid_perp_mean, valid_perp_se, perp = \
                    self.validate(epoch, train_data, cur_log_perp)
                print(
                    ('epoch: %d, train perp: %d, validation classified %d/100 '
                    + '(%.2f epochs/sec)') %
                    (epoch + 1, perp, 100 * (1 - valid_perp_mean), throughput))

                S.save_hdf5('pipc_lstm_%d.pkl' % self.n_hidden, self.model)
                cur_log_perp = self.mod.zeros(())
                prev = now

            sys.stdout.flush()
Example #26
0
def train(epoch_num):
    image_groups, sentence_groups = make_groups(train_image_ids, train_sentences)
    test_image_groups, test_sentence_groups = make_groups(test_image_ids, test_sentences, train=False)
    for epoch in range(epoch_num):
        batches = random_batches(image_groups, sentence_groups)
        sum_loss = 0
        sum_acc = 0
        sum_size = 0
        batch_num = len(batches)
        for i, (image_id_batch, sentence_batch) in enumerate(batches):
            loss, acc, size = forward(caption_net, images[image_id_batch], sentence_batch)
            optimizer.zero_grads()
            loss.backward()
            loss.unchain_backward()
            optimizer.update()
            sentence_length = sentence_batch.shape[1]
            sum_loss += float(loss.data) * size
            sum_acc += acc * size
            sum_size += size
            if (i + 1) % 500 == 0:
                print '{} / {} loss: {} accuracy: {}'.format(i + 1, batch_num, sum_loss / sum_size, sum_acc / sum_size)
        print 'epoch: {} done'.format(epoch + 1)
        print 'train loss: {} accuracy: {}'.format(sum_loss / sum_size, sum_acc / sum_size)
        sum_loss = 0
        sum_acc = 0
        sum_size = 0
        for image_ids, sentences in zip(test_image_groups, test_sentence_groups):
            if len(sentences) == 0:
                continue
            size = len(sentences)
            for i in range(0, size, batch_size):
                image_id_batch = image_ids[i:i + batch_size]
                sentence_batch = sentences[i:i + batch_size]
                loss, acc, size = forward(caption_net, images[image_id_batch], sentence_batch, train=False)
                sentence_length = sentence_batch.shape[1]
                sum_loss += float(loss.data) * size
                sum_acc += acc * size
                sum_size += size
        print 'test loss: {} accuracy: {}'.format(sum_loss / sum_size, sum_acc / sum_size)

        serializers.save_hdf5(args.output + '_{0:04d}.model'.format(epoch), caption_net)
        serializers.save_hdf5(args.output + '_{0:04d}.state'.format(epoch), optimizer)
Example #27
0
File: model.py Project: musyoku/NLP
	def save(self, dir=None, name="lstm"):
		if dir is None:
			raise Exception()
		try:
			os.mkdir(dir)
		except:
			pass
		serializers.save_hdf5(dir + "/%s_fc.model" % name, self.fc)
		serializers.save_hdf5(dir + "/%s_lstm.model" % name, self.lstm)
		print "model saved."
		serializers.save_hdf5(dir + "/%s_fc.optimizer" % name, self.optimizer_fc)
		serializers.save_hdf5(dir + "/%s_lstm.optimizer" % name, self.optimizer_lstm)
		print "optimizer saved."
def train_loop():
    # Trainer
    graph_generated = False
    while True:
        while data_q.empty():
            time.sleep(0.1)
        inp = data_q.get()
        if inp == 'end':  # quit
            res_q.put('end')
            break
        elif inp == 'train':  # restart training
            res_q.put('train')
            model.train = True
            continue
        elif inp == 'val':  # start validation
            res_q.put('val')
            serializers.save_hdf5(args.out, model)
            serializers.save_hdf5(args.outstate, optimizer)
            model.train = False
            continue

        volatile = 'off' if model.train else 'on'
        x = chainer.Variable(xp.asarray(inp[0]), volatile=volatile)
        t = chainer.Variable(xp.asarray(inp[1]), volatile=volatile)

        if model.train:
            optimizer.update(model, x, t)
            if not graph_generated:
                with open('graph.dot', 'w') as o:
                    o.write(computational_graph.build_computational_graph(
                        (model.loss,)).dump())
                print('generated graph', file=sys.stderr)
                graph_generated = True
        else:
            model(x, t)

        res_q.put((float(model.loss.data), float(model.accuracy.data)))
        del x, t
Example #29
0
    def test(self, x_l, y_l):
        y = F.softmax(self.mlp_ae.mlp_encoder(x_l, test=True))
        y_argmax = F.argmax(y, axis=1)
        acc = F.accuracy(y, y_l)
        y_l_cpu = cuda.to_cpu(y_l.data)
        y_argmax_cpu = cuda.to_cpu(y_argmax.data)

        # Confuction Matrix
        cm = confusion_matrix(y_l_cpu, y_argmax_cpu)
        print(cm)

        # Wrong samples
        idx = np.where(y_l_cpu != y_argmax_cpu)[0]

        # Generate and Save
        x_rec = self.mlp_ae.mlp_decoder(y, self.mlp_encoder.hiddens, test=True)
        save_incorrect_info(x_rec.data[idx, ], x_l.data[idx, ],
                            y.data[idx, ], y_l.data[idx, ])

        # Save model
        serializers.save_hdf5("./model/mlp_encdec.h5py", self.mlp_ae)

        return acc
Example #30
0
def fcn8s_caffe_to_chainermodel(caffe_prototxt, caffemodel_path,
                                chainermodel_path):
    net = caffe.Net(caffe_prototxt, caffemodel_path, caffe.TEST)

    model = FCN8s()
    for name, param in net.params.iteritems():
        layer = getattr(model, name)

        has_bias = True
        if len(param) == 1:
            has_bias = False

        print('{0}:'.format(name))
        # weight
        print('  - W:', param[0].data.shape, layer.W.data.shape)
        assert param[0].data.shape == layer.W.data.shape
        layer.W.data = param[0].data
        # bias
        if has_bias:
            print('  - b:', param[1].data.shape, layer.b.data.shape)
            assert param[1].data.shape == layer.b.data.shape
            layer.b.data = param[1].data
    S.save_hdf5(chainermodel_path, model)
Example #31
0
def main():

    ###########################
    #### create dictionary ####
    ###########################

    if os.path.exists('./data/corpus/dictionary.dict'):
        if args.lang == 'ja':
            corpus = JaConvCorpus(file_path=None,
                                  batch_size=batchsize,
                                  size_filter=True)
        else:
            corpus = ConvCorpus(file_path=None,
                                batch_size=batchsize,
                                size_filter=True)
        corpus.load(load_dir='./data/corpus/')
    else:
        if args.lang == 'ja':
            corpus = JaConvCorpus(file_path=data_file,
                                  batch_size=batchsize,
                                  size_filter=True)
        else:
            corpus = ConvCorpus(file_path=data_file,
                                batch_size=batchsize,
                                size_filter=True)
        corpus.save(save_dir='./data/corpus/')
    print('Vocabulary Size (number of words) :', len(corpus.dic.token2id))

    ######################
    #### create model ####
    ######################

    model = Seq2Seq(vocab_size=len(corpus.dic.token2id),
                    feature_num=feature_num,
                    hidden_num=hidden_num,
                    batch_size=batchsize,
                    gpu_flg=args.gpu)
    if args.gpu >= 0:
        model.to_gpu()
    optimizer = optimizers.Adam(alpha=0.001)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(5))

    ##########################
    #### create ID corpus ####
    ##########################

    input_mat = []
    output_mat = []
    input_mat_rev = []
    # output_wp_mat = []

    max_input_ren = max_output_ren = 0
    for input_text, output_text in zip(corpus.posts, corpus.cmnts):

        output_text.append(corpus.dic.token2id["<eos>"])

        # update max sentence length
        max_input_ren = max(max_input_ren, len(input_text))
        max_output_ren = max(max_output_ren, len(output_text))

        input_mat.append(input_text)
        output_mat.append(output_text)

        # # create word prediction matrix
        # wp = []
        # for wid in output_text:
        #     if wid not in wp:
        #         wp.append(wid)
        # output_wp_mat.append(wp)

    # make reverse corpus
    for input_text in input_mat:
        input_mat_rev.append(input_text[::-1])

    # padding
    for li in input_mat:
        insert_num = max_input_ren - len(li)
        for _ in range(insert_num):
            li.append(corpus.dic.token2id['<pad>'])
    for li in output_mat:
        insert_num = max_output_ren - len(li)
        for _ in range(insert_num):
            li.append(corpus.dic.token2id['<pad>'])
    for li in input_mat_rev:
        insert_num = max_input_ren - len(li)
        for _ in range(insert_num):
            li.insert(0, corpus.dic.token2id['<pad>'])

    # create batch matrix
    input_mat = np.array(input_mat, dtype=np.int32).T
    input_mat_rev = np.array(input_mat_rev, dtype=np.int32).T
    output_mat = np.array(output_mat, dtype=np.int32).T

    # separate corpus into Train and Test
    perm = np.random.permutation(len(corpus.posts))
    test_input_mat = input_mat[:, perm[0:0 + testsize]]
    test_output_mat = output_mat[:, perm[0:0 + testsize]]
    test_input_mat_rev = input_mat_rev[:, perm[0:0 + testsize]]
    train_input_mat = input_mat[:, perm[testsize:]]
    train_output_mat = output_mat[:, perm[testsize:]]
    train_input_mat_rev = input_mat_rev[:, perm[testsize:]]

    # train_output_wp_mat = []
    # for index in perm[testsize:]:
    #     train_output_wp_mat.append(output_wp_mat[index])

    #############################
    #### train seq2seq model ####
    #############################

    accum_loss = 0
    train_loss_data = []
    for num, epoch in enumerate(range(n_epoch)):
        total_loss = 0
        batch_num = 0
        perm = np.random.permutation(len(corpus.posts) - testsize)

        # for training
        for i in range(0, len(corpus.posts) - testsize, batchsize):

            # select batch data
            input_batch = remove_extra_padding(
                train_input_mat[:, perm[i:i + batchsize]], reverse_flg=False)
            input_batch_rev = remove_extra_padding(
                train_input_mat_rev[:, perm[i:i + batchsize]],
                reverse_flg=True)
            output_batch = remove_extra_padding(
                train_output_mat[:, perm[i:i + batchsize]], reverse_flg=False)
            # output_wp_batch = []
            # for index in perm[i:i + batchsize]:
            #     output_wp_batch.append(train_output_wp_mat[index])
            # output_wp_batch = create_wp_batch(vocab_size=len(corpus.dic.token2id),
            #                                   wp_lists=output_wp_batch)

            # Encode a sentence
            model.initialize(batch_size=input_batch.shape[1])
            model.encode(input_batch, input_batch_rev, train=True)

            # Decode from encoded context
            end_batch = xp.array([
                corpus.dic.token2id["<start>"]
                for _ in range(input_batch.shape[1])
            ])
            first_words = output_batch[0]
            loss, predict_mat = model.decode(end_batch,
                                             first_words,
                                             train=True)
            next_ids = first_words
            accum_loss += loss
            for w_ids in output_batch[1:]:
                loss, predict_mat = model.decode(next_ids, w_ids, train=True)
                next_ids = w_ids
                accum_loss += loss

            # learn model
            model.cleargrads()  # initialize all grad to zero
            accum_loss.backward()  # back propagation
            optimizer.update()
            total_loss += float(accum_loss.data)
            batch_num += 1
            print('Epoch: ', num, 'Batch_num', batch_num,
                  'batch loss: {:.2f}'.format(float(accum_loss.data)))
            accum_loss = 0

        train_loss_data.append(float(total_loss / batch_num))

        # save model and optimizer
        if (epoch + 1) % 5 == 0:
            print('-----', epoch + 1, ' times -----')
            print('save the model and optimizer')
            serializers.save_hdf5('data/' + str(epoch) + '.model', model)
            serializers.save_hdf5('data/' + str(epoch) + '.state', optimizer)

    # save loss data
    with open('./data/loss_train_data.pkl', 'wb') as f:
        pickle.dump(train_loss_data, f)
Example #32
0
v_iter = MultithreadIterator(validation,
                             batch_size=batch,
                             repeat=True,
                             shuffle=True,
                             n_threads=batch)

model = Network(channels, blocks, ksize)
if device >= 0: model.to_gpu()
optimizer = optimizers.Adam().setup(model)
updater = CustomUpdater({
    "main": t_iter,
    "test": v_iter
}, optimizer, (patch, patch))

trainer = Trainer(updater, (epoch, "epoch"), out=out)
log = extensions.LogReport()
trainer.extend(log)
trainer.extend(
    extensions.PrintReport(["epoch", "iteration", "loss", "test"], log))
trainer.extend(extensions.ProgressBar(update_interval=1))
trainer.extend(lambda trainer: save_hdf5(
    f"{out}/m{trainer.updater.iteration}.hdf5", model),
               trigger=(5, "epoch"))
trainer.extend(lambda trainer: save_hdf5(
    f"{out}/o{trainer.updater.iteration}.hdf5", optimizer),
               trigger=(5, "epoch"))

trainer.run()
save_hdf5(f"{out}/model.hdf5", model)
save_hdf5(f"{out}/optimizer.hdf5", optimizer)
        model, test_mean_loss, test_ac, test_IoU = test(
            model, MiniBatchLoader, test_mean_loss, test_ac, test_IoU)

        if args.logflag == 'on':
            etime = time.clock()
            debugger.writelog(MiniBatchLoader.datasize_train,
                              MiniBatchLoader.datasize_test,
                              MiniBatchLoader.batchsize,
                              'Human part segmentation',
                              stime,
                              etime,
                              train_mean_loss,
                              train_ac,
                              train_IoU,
                              test_mean_loss,
                              test_ac,
                              test_IoU,
                              epoch,
                              LOG_FILENAME=resultdir + 'log.txt')
            debugger.plot_result(train_mean_loss,
                                 test_mean_loss,
                                 savename=resultdir + 'log.png')
        if args.saveflag == 'on' and epoch % 10 == 0:
            from chainer import serializers
            serializers.save_hdf5(
                resultdir + 'humanpartsnet_epoch' + str(epoch) + '.model',
                model)
            serializers.save_hdf5(
                resultdir + 'humanpartsnet_epoch' + str(epoch) + '.state',
                optimizer)
Example #34
0
def train(params):
    en_model = EnglishLSTM(len(params['en_list']))
    en_rythm_model = EnglishRythmLSTM(len(params['en_rythm_list']))
    ja_model = JapaneseLSTM(len(params['ja_list']))
    ja_rythm_model = JapaneseRythmLSTM(len(params['ja_rythm_list']))
    data = {
        # 'english': en_model.get_train_data(params['english'], params['batch_size']), #並列
        # 'en_rythm': en_model.get_train_data(params['en_rythm'], params['batch_size'])
        'english': params['english'],
        'en_rythm': params['en_rythm'],
        'japanese': params['japanese'],
        'ja_rythm': params['ja_rythm'],
    }

    # 最適化アルゴリズムにAdamを採用
    optimizer = [
        optimizers.Adam().setup(en_model),
        optimizers.Adam().setup(en_rythm_model),
        optimizers.Adam().setup(ja_model),
        optimizers.Adam().setup(ja_rythm_model),
    ]

    loss_list = []
    step = []
    for epoch in range(params['epoch_num']):
        print("epoch: %d" % (epoch + 1))
        loss = 0.0
        # 英語歌詞の学習
        en_model.reset()
        for index, (en_phrase, en_rythm_phrase, ja_phrase,
                    ja_rythm_phrase) in enumerate(
                        zip(data['english'], data['en_rythm'],
                            data['japanese'], data['ja_rythm'])):
            # 曲が違う場合は状態をリセット
            if len(en_phrase) == 0:
                en_model.reset()
                en_rythm_model.reset()
                continue
            # if len(en_rythm_phrase) == 0:
            #     en_rythm_model.reset()
            #     continue
            # if len(ja_phrase) == 0:
            #     ja_model.reset()
            #     continue
            # 英語の歌詞
            for word in en_phrase:
                y_en = en_model.forward(word, params['en_list'])
            # 英語のリズム
            for rythm in en_rythm_phrase:
                y_en_rythm = en_rythm_model.forward(rythm,
                                                    params['en_rythm_list'])

            # 出力を足し合わせる
            h = y_en + y_en_rythm

            # hから日本語の1単語目を推測
            tx = Variable(
                np.array([params['ja_list'][ja_phrase[0]]], dtype=np.int32))
            loss += F.softmax_cross_entropy(ja_model.predict(h), tx)
            # 足し合わせた出力から日本語を出力
            for index, word in enumerate(ja_phrase):
                y_ja = ja_model.forward(word, params['ja_list'])
                if word != '<eos>':
                    tx = Variable(
                        np.array([params['ja_list'][ja_phrase[index + 1]]],
                                 dtype=np.int32))
                    # print(y_ja, tx)
                    loss += F.softmax_cross_entropy(y_ja, tx)

            # hから日本語の1つ目のリズムを推測
            tx = Variable(
                np.array([params['ja_rythm_list'][ja_rythm_phrase[0]]],
                         dtype=np.int32))
            loss += F.softmax_cross_entropy(ja_rythm_model.predict(h), tx)
            # 足し合わせた出力から日本語のリズムを出力
            for index, rythm in enumerate(ja_rythm_phrase):
                y_ja_rythm = ja_rythm_model.forward(rythm,
                                                    params['ja_rythm_list'])
                if rythm != '<eos>':
                    tx = Variable(
                        np.array([
                            params['ja_rythm_list'][ja_rythm_phrase[index + 1]]
                        ],
                                 dtype=np.int32))
                    # print(y_ja, tx)
                    loss += F.softmax_cross_entropy(y_ja_rythm, tx)
            # print(ja_model.l1.upward.W.grad)
            en_model.cleargrads()
            en_rythm_model.cleargrads()
            ja_model.cleargrads()
            ja_rythm_model.cleargrads()

            loss.backward()
            loss.unchain_backward()
            ja_model.reset()
            ja_rythm_model.reset()
            for opt in optimizer:
                opt.update()

        # lossの可視化
        step.append(epoch + 1)
        loss_list.append(loss.data)

        print(loss)
    # モデルとして保存
    serializers.save_hdf5('models/en_model_' + str(params['epoch_num']),
                          en_model)
    serializers.save_hdf5('models/en_rythm_model_' + str(params['epoch_num']),
                          en_rythm_model)
    serializers.save_hdf5('models/ja_model_' + str(params['epoch_num']),
                          ja_model)
    serializers.save_hdf5('models/ja_rythm_model_' + str(params['epoch_num']),
                          ja_rythm_model)

    # 学習過程のlossグラフ
    plt.plot(step, loss_list)
    plt.title("Training Data")
    plt.xlabel("step")
    plt.ylabel("loss")
    plt.grid(True)
    plt.show()
Example #35
0
    np.savez('topics.pyldavis', **data)
    for d, f in utils.chunks(batchsize, doc_ids, flattened):
        t0 = time.time()
        optimizer.zero_grads()
        l = model.fit_partial(d.copy(), f.copy())
        prior = model.prior()
        loss = prior * fraction
        loss.backward()
        optimizer.update()
        msg = ("J:{j:05d} E:{epoch:05d} L:{loss:1.3e} "
               "P:{prior:1.3e} R:{rate:1.3e}")
        prior.to_cpu()
        loss.to_cpu()
        t1 = time.time()
        dt = t1 - t0
        rate = batchsize / dt
        logs = dict(loss=float(l), epoch=epoch, j=j,
                    prior=float(prior.data), rate=rate)
        j += 1
        #
    #
    print '\nTime:', (time.time() - start), msg.format(**logs)
    if j > 0:# and j % 500 == 0:
        coherence = topic_coherence(top_words)
        print '\nCoherence:'
        for j in range(n_topics):
            print j, coherence[(j, 'cv')]
        kw = dict(top_words=top_words, coherence=coherence, epoch=epoch)
        progress[str(epoch)] = pickle.dumps(kw)
    serializers.save_hdf5("lda2vec.hdf5", model)
        x_batch = np.array(x_batch_list, dtype=np.float32)
        y_batch = np.array(y_batch_list, dtype=np.int32)
        if gpu_id >= 0:
            x_batch = cuda.to_gpu(x_batch)
            y_batch = cuda.to_gpu(y_batch)

        optimizer.zero_grads()
        loss, accuracy = forward(x_batch, y_batch)
        #print loss.data,accuracy.data
        with open(savedir + "real_loss.txt", "a") as f:
            f.write(str(loss.data) + '\n')
        with open(savedir + "real_accuracy.txt", "a") as f:
            f.write(str(accuracy.data) + '\n')
        loss.backward()
        optimizer.update()
        sum_loss += loss.data * batchsize
        sum_accuracy += accuracy.data * batchsize

    serializers.save_hdf5(
        savedir + "/relation_model" + str(epoch) + '.chainer', model)
    serializers.save_hdf5(savedir + "/optimizer" + str(epoch) + '.chainer',
                          optimizer)

    mean_loss = sum_loss / num_train_data
    mean_accuracy = sum_accuracy / num_train_data
    print mean_loss, mean_accuracy
    with open(savedir + "mean_loss.txt", "a") as f:
        f.write(str(mean_loss) + '\n')
    with open(savedir + "mean_accuracy.txt", "a") as f:
        f.write(str(mean_accuracy) + '\n')
                           feed_dict={loss_: np.mean(pre_train_loss)})
        summary_writer.add_summary(summary, test_count)
        summary = sess.run(test_loss_summary,
                           feed_dict={loss_: np.mean(test_loss)})
        summary_writer.add_summary(summary, test_count)
        samples = generator.generate(10, train=False)
        with open(os.path.join(out_dir, "generated_sample_pretrain.txt"),
                  'a',
                  encoding='utf-8') as f:
            f.write(
                '\npre-train epoch {}  train_loss {} test_loss {} \n'.format(
                    epoch, np.mean(pre_train_loss), np.mean(test_loss)))
            for x in samples:
                f.write(''.join([arasuji.vocab[w] for w in x]) + '\n')

    serializers.save_hdf5(
        os.path.join(out_dir, "models", "gen_pretrain.model"), generator)

else:
    # test
    test_loss = []
    for _ in range(test_num // batch_size):
        batch = arasuji.get_test_data(batch_size)
        g_loss = generator.pretrain_step(batch)
        test_loss.append(float(g_loss.data))
    print('\npre-trained test_loss {}'.format(np.mean(test_loss)))
    test_count = args.gen_pretrain_epoch
    summary = sess.run(test_loss_summary,
                       feed_dict={loss_: np.mean(test_loss)})
    summary_writer.add_summary(summary, test_count)

# discriminator pre-train
Example #38
0
def train_dcgan_labeled(gen, dis, epoch0=0):
    o_gen = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_dis = optimizers.Adam(alpha=0.0002, beta1=0.5)
    o_gen.setup(gen)
    o_dis.setup(dis)
    o_gen.add_hook(chainer.optimizer.WeightDecay(0.00001))
    o_dis.add_hook(chainer.optimizer.WeightDecay(0.00001))

    zvis = (xp.random.uniform(-1, 1, (100, nz), dtype=np.float32))

    for epoch in xrange(epoch0,n_epoch):
        perm = np.random.permutation(n_train)
        sum_l_dis = np.float32(0)
        sum_l_gen = np.float32(0)

        for i in xrange(0, n_train, batchsize):
            # discriminator
            # 0: from dataset
            # 1: from noise

            #print "load image start ", i
            x2 = np.zeros((batchsize, 1, patch_h,patch_w), dtype=np.float32)
            img = load_image()
            for j in range(batchsize):
                rndx = np.random.randint(img_w-patch_w)
                rndy = np.random.randint(img_h-patch_h)

                x2[j,0,:,:] = img[rndx:rndx+patch_w,rndy:rndy+patch_h]
            #print "load image done"

            # train generator
            z = Variable(xp.random.uniform(-1, 1, (batchsize, nz), dtype=np.float32))
            x = gen(z)
            yl = dis(x)
            L_gen = F.softmax_cross_entropy(yl, Variable(xp.zeros(batchsize, dtype=np.int32)))
            L_dis = F.softmax_cross_entropy(yl, Variable(xp.ones(batchsize, dtype=np.int32)))

            # train discriminator

            x2 = Variable(cuda.to_gpu(x2))
            yl2 = dis(x2)
            L_dis += F.softmax_cross_entropy(yl2, Variable(xp.zeros(batchsize, dtype=np.int32)))

            #print "forward done"

            o_gen.zero_grads()
            L_gen.backward()
            o_gen.update()

            o_dis.zero_grads()
            L_dis.backward()
            o_dis.update()

            sum_l_gen += L_gen.data.get()
            sum_l_dis += L_dis.data.get()

            #print "backward done"

            if i%image_save_interval==0:
                print "visualize...", epoch, i
                pylab.rcParams['figure.figsize'] = (16.0,16.0)
                pylab.clf()
                vissize = 100
                z = zvis
                z[50:,:] = (xp.random.uniform(-1, 1, (50, nz), dtype=np.float32))
                z = Variable(z)
                x = gen(z, test=True)
                x = x.data.get()
                for i_ in range(100):
                    tmp = data_to_image(i_, x)
                    pylab.subplot(10,10,i_+1)
                    pylab.imshow(tmp)
                    pylab.axis('off')
                pylab.savefig('%s/vis_%d_%06d.png'%(out_image_dir, epoch,i))
                print "visualized."
        serializers.save_hdf5("%s/dcgan_model_dis_%d.h5"%(out_model_dir, epoch),dis)
        serializers.save_hdf5("%s/dcgan_model_gen_%d.h5"%(out_model_dir, epoch),gen)
        serializers.save_hdf5("%s/dcgan_state_dis_%d.h5"%(out_model_dir, epoch),o_dis)
        serializers.save_hdf5("%s/dcgan_state_gen_%d.h5"%(out_model_dir, epoch),o_gen)
        print 'epoch end', epoch, sum_l_gen/n_train, sum_l_dis/n_train
Example #39
0
plt.figure(figsize=(8, 6))
plt.xlim([0, n_epoch])
plt.ylim([0.975, 1.0])


def add1list(list):
    return map(lambda item: item + 1, list)


plt.plot(add1list(xrange(len(train_acc))), train_acc)
plt.plot(add1list(xrange(len(test_acc))), test_acc)
plt.legend(["train_acc", "test_acc"], loc=4)
plt.title("Accuracy of digit recognition.")
plt.plot()

# Save the model and the optimizer
print 'save the model'
model.to_cpu()
serializers.save_hdf5('mlp.model', model)
print 'save the optimizer'
serializers.save_hdf5('mlp.state', optimizer)

finishtime = time.time()
print 'execute time = {}'.format(finishtime - starttime)

plt.savefig("graph.png")
plt.show()

# 対話的コンソール グラフスケール変えたい時とかに
# code.InteractiveConsole(globals()).interact()
    def method(self):
        x_train, x_test = np.split(self.x_data, [self.N])
        y_train, y_test = np.split(self.y_data.astype(np.int32), [self.N])
        self.N_test = y_test.size
        optimizer = optimizers.SGD()
        optimizer.setup(self.model)
        for k in self.label_counter.keys():
            self.label_counter[k] = 0
        for epoch in range(self.iteration_number):
            perm = np.random.permutation(self.N)

            sum_accuracy = 0
            sum_loss = 0
            for i in six.moves.range(0, self.N, self.batchsize):
                x = chainer.Variable(self.xp.asarray(x_train[perm[i:i + self.batchsize]]))
                t = chainer.Variable(self.xp.asarray(y_train[perm[i:i + self.batchsize]]))

                # Pass the loss function (Classifier defines it) and its arguments
                optimizer.update(self.model, x, t)

                if epoch == 1 and i == 0:
                    with open('graph.dot', 'w') as o:
                        g = computational_graph.build_computational_graph(
                            (self.model.loss, ), remove_split=True)
                        o.write(g.dump())
                    print('graph generated')

                #Apply the bias for output
                self.model.y.data = self.__bias(self.model.y.data, t.data)
                sum_loss += float(self.model.loss.data) * len(t.data)
                sum_accuracy += float(self.model.accuracy.data) * len(t.data)

            print('train mean loss={}, accuracy={}'.format(
                sum_loss / self.N, sum_accuracy / self.N))

        # evaluation
        sum_accuracy = 0
        sum_loss = 0
        self.y_predict_data = []
        sum_loss, sum_accuracy = 0, 0
        sum_recall = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        sum_precision = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        sum_f_score = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        for i in six.moves.range(0, self.N_test, self.batchsize):
            x = chainer.Variable(self.xp.asarray(x_test[i:i + self.batchsize]),
                                 volatile='on')
            t = chainer.Variable(self.xp.asarray(y_test[i:i + self.batchsize]),
                                 volatile='on')
            loss = self.model(x, t)
            for i in range(len(self.model.y.data)):
                self.y_predict_data.append(np.argmax(self.model.y.data[i]))
            sum_loss += float(loss.data) * len(t.data)
            sum_accuracy += float(self.model.accuracy.data) * len(t.data)
            sum_precision, sum_recall, sum_f_score = self.__calculate_metrics(t, sum_precision, sum_recall, sum_f_score)
            self.y_predict_data = []

        print('test  mean loss={}, accuracy={}'.format(
            sum_loss / self.N_test, sum_accuracy / self.N_test))        
        mean_recall = [n/self.N_test for n in sum_recall]
        mean_precision = [n/self.N_test for n in sum_precision]
        mean_f_score = [n/self.N_test for n in sum_f_score]
        print("mean_recall     ,", [x for x in mean_recall])
        print("mean_precision  ,", [x for x in mean_precision])
        print("mean_f_score  ,", [x for x in mean_f_score])
        # Save the model and the optimizer
        print('save the model')
        serializers.save_hdf5('emotion_recognition.model', self.model)
        print('save the optimizer')
        serializers.save_hdf5('emotion_recognition.state', optimizer)
Example #41
0
def main():

    ###########################
    #### create dictionary ####
    ###########################

    if os.path.exists(CORPUS_DIR + 'dictionary.dict'):
        corpus = JaConvCorpus(create_flg=False,
                              batch_size=batchsize,
                              size_filter=True)
        corpus.load(load_dir=CORPUS_DIR)
    else:
        corpus = JaConvCorpus(create_flg=True,
                              batch_size=batchsize,
                              size_filter=True)
        corpus.save(save_dir=CORPUS_DIR)
    print('Vocabulary Size (number of words) :', len(corpus.dic.token2id))
    print('Emotion size: ', len(corpus.emotion_set))

    # search word_threshold (general or emotional)
    ma = 0
    mi = 999999
    for word in corpus.emotion_set:
        wid = corpus.dic.token2id[word]
        if wid > ma:
            ma = wid
        if wid < mi:
            mi = wid
    word_threshold = mi

    ######################
    #### create model ####
    ######################

    model = PreTrainSeq2Seq(all_vocab_size=len(corpus.dic.token2id),
                            emotion_vocab_size=len(corpus.emotion_set),
                            feature_num=feature_num,
                            hidden_num=hidden_num,
                            batch_size=batchsize,
                            label_num=label_num,
                            label_embed_num=label_embed,
                            gpu_flg=args.gpu)

    if args.gpu >= 0:
        model.to_gpu()
    optimizer = optimizers.Adam(alpha=0.001)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001))

    ##########################
    #### create ID corpus ####
    ##########################

    input_mat = []
    output_mat = []
    input_mat_rev = []
    label_mat = []
    max_input_ren = max_output_ren = 0
    print('start making corpus matrix...')
    for input_text, output_text in zip(corpus.rough_posts, corpus.rough_cmnts):

        # reverse an input and add eos tag
        output_text.append(corpus.dic.token2id["<eos>"])  # 出力の最後にeosを挿入

        # update max sentence length
        max_input_ren = max(max_input_ren, len(input_text))
        max_output_ren = max(max_output_ren, len(output_text))

        # make a list of lists
        input_mat.append(input_text)
        output_mat.append(output_text)

        # make label lists TODO: 3値分類(pos, neg, neu)のみの対応なので可変にする
        n_num = p_num = 0
        for word in output_text:
            if corpus.dic[word] in corpus.neg_words:
                n_num += 1
            if corpus.dic[word] in corpus.pos_words:
                p_num += 1
        if (n_num + p_num) == 0:
            label_mat.append([1 for _ in range(len(output_text))])
        elif n_num <= p_num:
            label_mat.append([2 for _ in range(len(output_text))])
        elif n_num > p_num:
            label_mat.append([0 for _ in range(len(output_text))])
        else:
            raise ValueError

    # make reverse corpus
    for input_text in input_mat:
        input_mat_rev.append(input_text[::-1])

    # padding (inputの文頭・outputの文末にパディングを挿入する)
    print('start labeling...')
    for li in input_mat:
        insert_num = max_input_ren - len(li)
        for _ in range(insert_num):
            li.append(corpus.dic.token2id['<pad>'])
    for li in output_mat:
        insert_num = max_output_ren - len(li)
        for _ in range(insert_num):
            li.append(corpus.dic.token2id['<pad>'])
    for li in input_mat_rev:
        insert_num = max_input_ren - len(li)
        for _ in range(insert_num):
            li.insert(0, corpus.dic.token2id['<pad>'])
    for li in label_mat:
        insert_num = max_output_ren - len(li)
        for _ in range(insert_num):
            li.append(corpus.dic.token2id['<pad>'])
    if len(output_mat) != len(label_mat):
        print('Output matrix and label matrix should have the same dimension.')
        raise ValueError

    # create batch matrix
    print('transpose...')
    input_mat = np.array(input_mat, dtype=np.int32).T
    input_mat_rev = np.array(input_mat_rev, dtype=np.int32).T
    output_mat = np.array(output_mat, dtype=np.int32).T
    label_mat = np.array(label_mat, dtype=np.int32).T

    # separate corpus into Train and Test TODO:実験時はテストデータとトレーニングデータに分離する
    print('split train and test...')
    train_input_mat = input_mat
    train_output_mat = output_mat
    train_input_mat_rev = input_mat_rev
    train_label_mat = label_mat

    #############################
    #### train seq2seq model ####
    #############################

    accum_loss = 0
    train_loss_data = []
    print('start training...')
    for num, epoch in enumerate(range(n_epoch)):
        total_loss = 0
        batch_num = 0
        perm = np.random.permutation(len(corpus.rough_posts))

        # for training
        for i in range(0, len(corpus.rough_posts), batchsize):

            # select batch data
            input_batch = remove_extra_padding(
                train_input_mat[:, perm[i:i + batchsize]], reverse_flg=False)
            input_batch_rev = remove_extra_padding(
                train_input_mat_rev[:, perm[i:i + batchsize]],
                reverse_flg=True)
            output_batch = remove_extra_padding(
                train_output_mat[:, perm[i:i + batchsize]], reverse_flg=False)
            label_batch = remove_extra_padding(
                train_label_mat[:, perm[i:i + batchsize]], reverse_flg=False)

            # Encode a sentence
            model.initialize(
                batch_size=input_batch.shape[1])  # initialize cell
            model.encode(input_batch, input_batch_rev,
                         train=True)  # encode (output: hidden Variable)

            # Decode from encoded context
            input_ids = xp.array([
                corpus.dic.token2id["<start>"]
                for _ in range(input_batch.shape[1])
            ])
            for w_ids, l_ids in zip(output_batch, label_batch):
                loss, predict_mat = model.decode(input_ids,
                                                 w_ids,
                                                 label_id=l_ids,
                                                 word_th=word_threshold,
                                                 train=True)
                input_ids = w_ids
                accum_loss += loss

            # learn model
            model.cleargrads()  # initialize all grad to zero
            accum_loss.backward()  # back propagation
            optimizer.update()
            total_loss += float(accum_loss.data)
            batch_num += 1
            print('Epoch: ', num, 'Batch_num', batch_num,
                  'batch loss: {:.2f}'.format(float(accum_loss.data)))
            accum_loss = 0

        train_loss_data.append(float(total_loss / batch_num))

        # save model and optimizer
        print('-----', epoch + 1, ' times -----')
        print('save the model and optimizer')
        serializers.save_hdf5('../data/seq2seq/' + str(epoch) + '_rough.model',
                              model)
        serializers.save_hdf5('../data/seq2seq/' + str(epoch) + '_rough.state',
                              optimizer)

    # save loss data
    with open('./data/loss_train_data.pkl', 'wb') as f:
        pickle.dump(train_loss_data, f)
Example #42
0
 def save(self, filename):
     tmp_filename = filename + "." + str(uuid.uuid4())
     serializers.save_hdf5(tmp_filename, self)
     if os.path.isfile(filename):
         os.remove(filename)
     os.rename(tmp_filename, filename)
Example #43
0
 def save(self):
     for name, f in self.forward:
         if 'res' in name:
             serializers.save_hdf5(name + '.hdf5', getattr(self, name))
Example #44
0
 def save_params(self, epoch):
     print "==> saving state %s" % self.out_model_dir
     serializers.save_hdf5(
         "%s/net_model_classifier_%d.h5" % (self.out_model_dir, epoch),
         self.network)
Example #45
0
def main():
    # parse command line args
    parser = argparse.ArgumentParser()
    parser.add_argument('--configfile', '-c', default="", type=str, help='')
    args = parser.parse_args()

    with open(args.configfile, "r+") as f:
        config = yaml.load(f)

    # GPU settings
    if config["GPU"] >= 0:
        cuda.check_cuda_available()
        cuda.get_device(config["GPU"]).use()
    xp = cuda.cupy if config["GPU"] >= 0 else np

    initial_embedding = ""
    if "init_emb" in config and config["init_emb"] != "None":
        with open(config["init_emb"], "rb") as f:
            initial_embedding = pickle.load(f)
    else:
        initial_embedding = None
    ######################
    #### create model ####
    ######################
    model, corpus = load_model(config, initial_embedding)

    wordvector_model = load_wordvector(config)

    if config["GPU"] >= 0:
        model.to_gpu()
    optimizer = optimizers.Adam(alpha=config["training_rate"])
    optimizer.setup(model)

    if "fix_embedding" in config and config["fix_embedding"]:
        model.enc.word_embed.disable_update()
    optimizer.add_hook(chainer.optimizer.GradientClipping(5))

    if config["NN_model"] in ["RNN", "GRU"]:
        corpus.train_data[0] = [
            xp.array(x, dtype=xp.int32) for x in corpus.train_data[0]
        ]
        corpus.train_data = list(
            zip(corpus.train_data[0], corpus.train_data[1],
                corpus.train_data[2], corpus.train_data[3]))
        if hasattr(corpus, "dev_data"):
            corpus.dev_data[0] = [
                xp.array(x, dtype=xp.int32) for x in corpus.dev_data[0]
            ]
            corpus.dev_data = list(
                zip(corpus.dev_data[0], corpus.dev_data[1], corpus.dev_data[2],
                    corpus.dev_data[3]))
    elif config["NN_model"] in ["CNN", "SUM", "SUMFF"]:
        corpus.train_data[0] = [
            xp.array([
                x[i] if i < len(x) else -1 for i in range(corpus.max_input_len)
            ],
                     dtype=xp.int32) for x in corpus.train_data[0]
        ]
        corpus.train_data = list(
            zip(corpus.train_data[0], corpus.train_data[1],
                corpus.train_data[2], corpus.train_data[3]))
        if hasattr(corpus, "dev_data"):
            corpus.dev_data[0] = [
                xp.array([
                    x[i] if i < len(x) else -1
                    for i in range(corpus.max_input_len)
                ],
                         dtype=xp.int32) for x in corpus.dev_data[0]
            ]
            corpus.dev_data = list(
                zip(corpus.dev_data[0], corpus.dev_data[1], corpus.dev_data[2],
                    corpus.dev_data[3]))
    else:
        print("model is not defined")
        exit()

    #############################
    #### train mimic model ####
    #############################

    if "overfit" in config and config["overfit"]:
        train_loss_data = []
        minimum_train_loss = 9999999
        minimum_epoch = 0
        minimum_train_loss_flag = 0
        for num, epoch in enumerate(range(999999)):
            total_loss = 0
            batch_num = 0
            random.shuffle(corpus.train_data)

            # for training
            for i in range(0, len(corpus.train_data), config["batchsize"]):

                # select batch data
                batch = corpus.train_data[i:i + config["batchsize"]]
                batch = list(zip(*batch))
                loss = calc_batch_loss(batch, config, model, wordvector_model)

                # learn model
                model.cleargrads()  # initialize all grad to zero
                loss.backward()  # back propagation
                optimizer.update()
                total_loss += float(loss.data)
                batch_num += 1
                # print('Epoch: ', num, 'Batch_num', batch_num, 'batch loss: {:.2f}'.format(float(loss.data)))

            # save model and optimizer
            if total_loss / batch_num < minimum_train_loss:
                print('-----', epoch + 1, ' times -----')
                print('save the model and optimizer for train loss')
                serializers.save_hdf5(
                    'data/' + config["modelname"] + '_best_train_loss.model',
                    model)
                serializers.save_hdf5(
                    'data/' + config["modelname"] + '_best_train_loss.state',
                    optimizer)
                minimum_train_loss = total_loss / batch_num
                minimum_epoch = epoch
                minimum_train_loss_flag = 0
            else:
                minimum_train_loss_flag += 1
                if minimum_train_loss_flag > 4:
                    break
            if epoch == 39:
                print('save the model and optimizer')
                serializers.save_hdf5(
                    'data/' + config["modelname"] + '_best.model', model)
                serializers.save_hdf5(
                    'data/' + config["modelname"] + '_best.state', optimizer)

            # display the on-going status
            print('Epoch: ', num, 'Train sim loss: {:.2f}'.format(total_loss))
            train_loss_data.append(float(total_loss / batch_num))

        # save loss data
        with open('./data/train_loss_' + config["modelname"] + '.pkl',
                  'wb') as f:
            pickle.dump(train_loss_data, f)
        print(minimum_epoch)

    else:
        train_loss_data = []
        dev_loss_data = []
        minimum_loss = 9999999
        minimum_train_loss = 9999999
        for num, epoch in enumerate(range(config["epoch"])):
            total_loss = dev_loss = 0
            batch_num = 0
            random.shuffle(corpus.train_data)

            # for training
            for i in range(0, len(corpus.train_data), config["batchsize"]):

                # select batch data
                batch = corpus.train_data[i:i + config["batchsize"]]
                batch = list(zip(*batch))
                loss = calc_batch_loss(batch, config, model, wordvector_model)

                # learn model
                model.cleargrads()  # initialize all grad to zero
                loss.backward()  # back propagation
                optimizer.update()
                total_loss += float(loss.data)
                batch_num += 1
                print('Epoch: ', num, 'Batch_num', batch_num,
                      'batch loss: {:.2f}'.format(float(loss.data)))

            # for developing
            for i in range(0, config["devsize"], config["batchsize"]):

                # select dev batch data
                batch = corpus.dev_data[i:i + config["batchsize"]]
                batch = list(zip(*batch))
                loss = calc_batch_loss(batch, config, model, wordvector_model)

                dev_loss += loss
            # save model and optimizer
            if dev_loss.data < minimum_loss:
                print('-----', epoch + 1, ' times -----')
                print('save the model and optimizer')
                serializers.save_hdf5(
                    'data/' + config["modelname"] + '_best.model', model)
                serializers.save_hdf5(
                    'data/' + config["modelname"] + '_best.state', optimizer)
                minimum_loss = dev_loss.data

            # save model and optimizer
            if total_loss / batch_num < minimum_train_loss:
                print('-----', epoch + 1, ' times -----')
                print('save the model and optimizer for train loss')
                serializers.save_hdf5(
                    'data/' + config["modelname"] + '_best_train_loss.model',
                    model)
                serializers.save_hdf5(
                    'data/' + config["modelname"] + '_best_train_loss.state',
                    optimizer)
                minimum_train_loss = total_loss / batch_num

            # display the on-going status
            print('Epoch: ', num, 'Train sim loss: {:.2f}'.format(total_loss),
                  'dev sim loss: {:.2f}'.format(float(dev_loss.data)))
            train_loss_data.append(float(total_loss / batch_num))
            dev_loss_data.append(float(dev_loss.data))

        # save loss data
        with open('./data/train_loss_' + config["modelname"] + '.pkl',
                  'wb') as f:
            pickle.dump(train_loss_data, f)
        with open('./data/dev_loss_' + config["modelname"] + '.pkl',
                  'wb') as f:
            pickle.dump(dev_loss_data, f)

        # evaluate with origin vector
        from model import Interpreter
        interpreter = Interpreter(config)

        mse_total = 0
        cos_sim_total = 0
        total = 0
        for word in corpus.test_data:
            v_o = wordvector_model[word]
            v_m = interpreter(word)
            mse_total += mse(v_o, v_m)
            cos_sim_total += cos_sim(v_o, v_m)
            total += 1

        print(mse_total / total / config["feature_num"])
        print(cos_sim_total / total)
Example #46
0
def main():
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU device ID')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=50,
                        help='# of epoch')
    parser.add_argument('--batch_size',
                        type=int,
                        default=128,
                        help='size of mini-batch')
    parser.add_argument('--density',
                        type=int,
                        default=1,
                        help='density of cnn kernel')
    parser.add_argument('--small',
                        dest='small',
                        action='store_true',
                        default=False)
    parser.add_argument('--no_bn',
                        dest='use_bn',
                        action='store_false',
                        default=True)
    parser.add_argument('--out', default='')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    # model = SLPolicy(use_bn=args.use_bn)
    # model = RolloutPolicy()
    model = RLPolicy()

    # log directory
    out = datetime.datetime.now().strftime('%m%d')
    if args.out:
        out = out + '_' + args.out
    out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", out))
    os.makedirs(os.path.join(out_dir, 'models'), exist_ok=True)

    # gpu
    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()

    # setting
    with open(os.path.join(out_dir, 'setting.txt'), 'w') as f:
        for k, v in args._get_kwargs():
            print('{} = {}'.format(k, v))
            f.write('{} = {}\n'.format(k, v))

    # prepare for dataset
    if args.small:
        train = PreprocessedDataset(train_small_path)
    else:
        train = PreprocessedDataset(train_path)
    test = PreprocessedDataset(test_path)
    train_iter = iterators.SerialIterator(train, args.batch_size)
    val_iter = iterators.SerialIterator(test, args.batch_size, repeat=False)

    # optimizer
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.001))

    # start training
    start = time.time()
    train_count = 0
    for epoch in range(args.epoch):

        # train
        train_loss = []
        train_accuracy = []

        for i in range(len(train) // args.batch_size):
            batch = train_iter.next()
            x = chainer.Variable(
                model.xp.array([b[0] for b in batch], 'float32'))
            y = chainer.Variable(model.xp.array([b[1] for b in batch],
                                                'int32'))
            optimizer.update(model, x, y)
            train_count += 1

            progress_report(train_count, start, args.batch_size)

            train_loss.append(cuda.to_cpu(model.loss.data))
            train_accuracy.append(cuda.to_cpu(model.accuracy.data))

        # test
        test_loss = []
        test_accuracy = []
        valid_ply_rate = []

        it = copy.copy(val_iter)
        for batch in it:
            x = chainer.Variable(model.xp.array([b[0] for b in batch],
                                                'float32'),
                                 volatile=True)
            y = chainer.Variable(model.xp.array([b[1] for b in batch],
                                                'int32'),
                                 volatile=True)
            plies = model(x, y, train=False)
            for b, ply in zip(batch, plies):
                if b[1] >= 0:
                    valid_ply_rate.append(
                        board.is_valid(b[0][:2], b[0][4][0][0], ply // 8,
                                       ply % 8))

            test_loss.append(cuda.to_cpu(model.loss.data))
            test_accuracy.append(cuda.to_cpu(model.accuracy.data))

        print(
            '\nepoch {}  train_loss {:.5f}  train_accuracy {:.3f} \n'
            '          test_loss {:.5f}  test_accuracy {:.3f} valid_ply_rate {:.3f}'
            .format(epoch, np.mean(train_loss), np.mean(train_accuracy),
                    np.mean(test_loss), np.mean(test_accuracy),
                    np.mean(valid_ply_rate)))
        with open(os.path.join(out_dir, "log"), 'a+') as f:
            f.write(
                'epoch {}  train_loss {:.5f}  train_accuracy {:.3f} \n'
                '          test_loss {:.5f}   test_accuracy {:.3f}  valid_ply_rate {:.3f}\n'
                .format(epoch, np.mean(train_loss), np.mean(train_accuracy),
                        np.mean(test_loss), np.mean(test_accuracy),
                        np.mean(valid_ply_rate)))

        if epoch % 3 == 0:
            serializers.save_hdf5(
                os.path.join(out_dir, "models",
                             "sl_policy_{}.model".format(epoch)), model)
Example #47
0
def train(args):
  trace('loading corpus ...')
  with open(args.source) as fp:
    trees = [make_tree(l) for l in fp]

  trace('extracting leaf nodes ...')
  word_lists = [extract_words(t) for t in trees]
  lower_lists = [[w.lower() for w in words] for words in word_lists]

  trace('extracting gold operations ...')
  op_lists = [make_operations(t) for t in trees]

  trace('making vocabulary ...')
  word_vocab = Vocabulary.new(lower_lists, args.vocab)
  phrase_set = set()
  semiterminal_set = set()
  for tree in trees:
    phrase_set |= set(extract_phrase_labels(tree))
    semiterminal_set |= set(extract_semiterminals(tree))
  phrase_vocab = Vocabulary.new([list(phrase_set)], len(phrase_set), add_special_tokens=False)
  semiterminal_vocab = Vocabulary.new([list(semiterminal_set)], len(semiterminal_set), add_special_tokens=False)

  trace('converting data ...')
  word_lists = [convert_word_list(x, word_vocab) for x in word_lists]
  op_lists = [convert_op_list(x, phrase_vocab, semiterminal_vocab) for x in op_lists]

  trace('start training ...')
  parser = Parser(
    args.vocab, args.embed, args.char_embed, args.queue,
    args.stack, args.srstate, len(phrase_set), len(semiterminal_set),
  )
  if args.use_gpu:
    parser.to_gpu()
  opt = optimizers.SGD(lr=0.1)
  opt.setup(parser)
  opt.add_hook(optimizer.GradientClipping(10))
  opt.add_hook(optimizer.WeightDecay(0.0001))

  batch_set = list(zip(word_lists, op_lists))

  for epoch in range(args.epoch):
    n = 0
    random.shuffle(batch_set)
    
    for samples in batch(batch_set, args.minibatch):
      parser.zerograds()
      loss = XP.fzeros(())

      for word_list, op_list in zip(*samples):
        trace('epoch %3d, sample %6d:' % (epoch + 1, n + 1))
        loss += parser.forward(word_list, op_list, 0)
        n += 1
      
      loss.backward()
      opt.update()

    trace('saving model ...')
    prefix = args.model + '.%03.d' % (epoch + 1)
    word_vocab.save(prefix + '.words')
    phrase_vocab.save(prefix + '.phrases')
    semiterminal_vocab.save(prefix + '.semiterminals')
    parser.save_spec(prefix + '.spec')
    serializers.save_hdf5(prefix + '.weights', parser)

    opt.lr *= 0.92

  trace('finished.')
                optm_dnn.update()
                loss = 0.0
                loss_cnt = 0

            sys.stdout.write('\n')
        ave_loss = sum_loss / total_cnt

        print("Average Loss = " + str(ave_loss))

        print("Save DNN...")
        newdatetime = datetime.now().strftime('%m%d_%H%M%S')

        dnnName = DNNmode + '_s' + str(stateSize) + 'h' + str(hiddenSize)
        saveDNNfn = mm.DNNfn(dnn_dir, dnnName, chNum, winLen, shiftLen, fftLen,
                             winMode, lossMode, epoch, newdatetime)
        serializers.save_hdf5(saveDNNfn, dnnEst)
        process_time = time.time() - start
        print("exeTime(sec):" + str(process_time))

        start = time.time()

        try:
            delfn = mm.DNNfn(dnn_dir, dnnName, chNum, winLen, shiftLen, fftLen,
                             winMode, lossMode, epoch - 1, olddatetime)
            os.remove(delfn)
        except:
            print('no file to delete')

        olddatetime = newdatetime

        plt.figure()
Example #49
0
def run(data_file, is_train=False, **args):
    for k in six.iterkeys(args):
        args[k] = deepcrf.util.str_to_unicode_python2(args[k])

    is_test = not is_train
    batchsize = args['batchsize']
    model_name = args['model_name']
    optimizer_name = args['optimizer']
    save_dir = args['save_dir']
    print(args)

    def convert_multi_files(data_file):
        if args.get('use_list_files', False):
            with open(data_file) as f:
                data_files = [filename.strip() for filename in f]
        else:
            data_files = [data_file]
        return data_files

    data_files = convert_multi_files(data_file)

    # TODO: check save_dir exist
    if not os.path.isdir(save_dir):
        err_msg = 'There is no dir : {}\n'.format(save_dir)
        err_msg += '##############################\n'
        err_msg += '## Please followiing: \n'
        err_msg += '## $ mkdir {}\n'.format(save_dir)
        err_msg += '##############################\n'
        raise ValueError(err_msg)

    save_name = args['save_name']
    save_name = os.path.join(save_dir, save_name)

    xp = cuda.cupy if args['gpu'] >= 0 else np
    efficient_gpu = False
    if args['gpu'] >= 0:
        cuda.get_device_from_id(args['gpu']).use()
        xp.random.seed(1234)
        efficient_gpu = args.get('efficient_gpu', False)

    def to_gpu(x):
        if args['gpu'] >= 0:
            return chainer.cuda.to_gpu(x)
        return x

    # load files
    dev_file = args['dev_file']
    test_file = args['test_file']
    delimiter = args['delimiter']
    input_idx = list(map(int, args['input_idx'].split(',')))
    output_idx = list(map(int, args['output_idx'].split(',')))
    word_input_idx = input_idx[0]  # NOTE: word_idx is first column!
    additional_input_idx = input_idx[1:]
    sentences_train = []
    if is_train:
        sentences_train = deepcrf.util.read_conll_file(filenames=data_files,
                                                       delimiter=delimiter)
        if len(sentences_train) == 0:
            s = str(len(sentences_train))
            err_msg = 'Invalid training sizes: {} sentences. '.format(s)
            raise ValueError(err_msg)
    else:
        # Predict
        if len(input_idx) == 1:
            # raw text format
            sentences_train = deepcrf.util.read_raw_file(filenames=data_files,
                                                         delimiter=u' ')
        else:
            # conll format
            sentences_train = deepcrf.util.read_conll_file(
                filenames=data_files, delimiter=delimiter)

    # sentences_train = sentences_train[:100]

    sentences_dev = []
    sentences_test = []
    if dev_file:

        dev_file = convert_multi_files(dev_file)
        sentences_dev = deepcrf.util.read_conll_file(dev_file,
                                                     delimiter=delimiter)
    if test_file:
        test_file = convert_multi_files(test_file)
        sentences_test = deepcrf.util.read_conll_file(test_file,
                                                      delimiter=delimiter)

    # Additional setup
    vocab_adds = []
    for ad_feat_id in additional_input_idx:
        sentences_additional_train = [[
            feat_obj[ad_feat_id] for feat_obj in sentence
        ] for sentence in sentences_train]
        vocab_add = deepcrf.util.build_vocab(sentences_additional_train)
        vocab_adds.append(vocab_add)

    save_vocab = save_name + '.vocab'
    save_vocab_char = save_name + '.vocab_char'
    save_tags_vocab = save_name + '.vocab_tag'
    save_train_config = save_name + '.train_config'

    # TODO: check unknown pos tags
    # TODO: compute unk words

    if is_train:
        sentences_words_train = [[w_obj[word_input_idx] for w_obj in sentence]
                                 for sentence in sentences_train]
        vocab = deepcrf.util.build_vocab(sentences_words_train)
        vocab_char = deepcrf.util.build_vocab(
            deepcrf.util.flatten(sentences_words_train))
        vocab_tags = deepcrf.util.build_tag_vocab(sentences_train)
    elif is_test:
        vocab = deepcrf.util.load_vocab(save_vocab)
        vocab_char = deepcrf.util.load_vocab(save_vocab_char)
        vocab_tags = deepcrf.util.load_vocab(save_tags_vocab)
        vocab_adds = []
        for i, idx in enumerate(additional_input_idx):
            save_additional_vocab = save_name + '.vocab_additional_' + str(i)
            vocab_add = deepcrf.util.load_vocab(save_additional_vocab)
            vocab_adds.append(vocab_add)

    if args.get('word_emb_file', False):
        # set Pre-trained embeddings
        # emb_file = './emb/glove.6B.100d.txt'
        emb_file = args['word_emb_file']
        word_emb_vocab_type = args.get('word_emb_vocab_type')

        def assert_word_emb_shape(shape1, shape2):
            err_msg = '''Pre-trained embedding size is not equal to `--n_word_emb` ({} != {})'''
            if shape1 != shape2:
                err_msg = err_msg.format(str(shape1), str(shape2))
                raise ValueError(err_msg)

        def assert_no_emb(word_vecs):
            err_msg = '''There is no-embeddings! Please check your file `--word_emb_file`'''
            if word_vecs.shape[0] == 0:
                raise ValueError(err_msg)

        if word_emb_vocab_type == 'replace_all':
            # replace all vocab by Pre-trained embeddings
            word_vecs, vocab_glove = deepcrf.util.load_glove_embedding_include_vocab(
                emb_file)
            vocab = vocab_glove
        elif word_emb_vocab_type == 'replace_only':
            word_ids, word_vecs = deepcrf.util.load_glove_embedding(
                emb_file, vocab)
            assert_no_emb(word_vecs)
        elif word_emb_vocab_type == 'additional':
            word_vecs, vocab_glove = deepcrf.util.load_glove_embedding_include_vocab(
                emb_file)
            additional_vecs = []
            for word, word_idx in sorted(six.iteritems(vocab_glove),
                                         key=lambda x: x[1]):
                if word not in vocab:
                    vocab[word] = len(vocab)
                    additional_vecs.append(word_vecs[word_idx])
            additional_vecs = np.array(additional_vecs, dtype=np.float32)

    if args.get('vocab_file', False):
        vocab_file = args['vocab_file']
        vocab = deepcrf.util.load_vocab(vocab_file)

    if args.get('vocab_char_file', False):
        vocab_char_file = args['vocab_char_file']
        vocab_char = deepcrf.util.load_vocab(vocab_char_file)

    vocab_tags_inv = dict((v, k) for k, v in six.iteritems(vocab_tags))
    PAD_IDX = vocab[PADDING]
    UNK_IDX = vocab[UNKWORD]

    CHAR_PAD_IDX = vocab_char[PADDING]
    CHAR_UNK_IDX = vocab_char[UNKWORD]

    tmp_xp = xp
    if efficient_gpu:
        tmp_xp = np  # use CPU (numpy)

    def parse_to_word_ids(sentences, word_input_idx, vocab):
        return deepcrf.util.parse_to_word_ids(sentences,
                                              xp=tmp_xp,
                                              vocab=vocab,
                                              UNK_IDX=UNK_IDX,
                                              idx=word_input_idx)

    def parse_to_char_ids(sentences):
        return deepcrf.util.parse_to_char_ids(sentences,
                                              xp=tmp_xp,
                                              vocab_char=vocab_char,
                                              UNK_IDX=CHAR_UNK_IDX,
                                              idx=word_input_idx)

    def parse_to_tag_ids(sentences):
        return deepcrf.util.parse_to_tag_ids(sentences,
                                             xp=tmp_xp,
                                             vocab=vocab_tags,
                                             UNK_IDX=-1,
                                             idx=-1)

    x_train = parse_to_word_ids(sentences_train, word_input_idx, vocab)
    x_char_train = parse_to_char_ids(sentences_train)
    y_train = parse_to_tag_ids(sentences_train)
    x_train_additionals = [
        parse_to_word_ids(sentences_train, ad_feat_id, vocab_adds[i])
        for i, ad_feat_id in enumerate(additional_input_idx)
    ]

    x_dev = parse_to_word_ids(sentences_dev, word_input_idx, vocab)
    x_char_dev = parse_to_char_ids(sentences_dev)
    y_dev = parse_to_tag_ids(sentences_dev)
    x_dev_additionals = [
        parse_to_word_ids(sentences_dev, ad_feat_id, vocab_adds[i])
        for i, ad_feat_id in enumerate(additional_input_idx)
    ]

    y_dev_cpu = [[w[-1] for w in sentence] for sentence in sentences_dev]
    # tag_names = []
    tag_names = list(
        set([
            tag[2:] if len(tag) >= 2 else tag[0]
            for tag in six.iterkeys(vocab_tags)
        ]))

    x_test = parse_to_word_ids(sentences_test, word_input_idx, vocab)
    x_char_test = parse_to_char_ids(sentences_test)
    y_test = parse_to_tag_ids(sentences_test)
    x_test_additionals = [
        parse_to_word_ids(sentences_test, ad_feat_id, vocab_adds[i])
        for i, ad_feat_id in enumerate(additional_input_idx)
    ]

    cnt_train_unk = sum([tmp_xp.sum(d == UNK_IDX) for d in x_train])
    cnt_train_word = sum([d.size for d in x_train])
    unk_train_unk_rate = float(cnt_train_unk) / cnt_train_word

    cnt_dev_unk = sum([tmp_xp.sum(d == UNK_IDX) for d in x_dev])
    cnt_dev_word = sum([d.size for d in x_dev])
    unk_dev_unk_rate = float(cnt_dev_unk) / max(cnt_dev_word, 1)

    logging.info('train:' + str(len(x_train)))
    logging.info('dev  :' + str(len(x_dev)))
    logging.info('test :' + str(len(x_test)))
    logging.info('vocab     :' + str(len(vocab)))
    logging.info('vocab_tags:' + str(len(vocab_tags)))
    logging.info('unk count (train):' + str(cnt_train_unk))
    logging.info('unk rate  (train):' + str(unk_train_unk_rate))
    logging.info('cnt all words (train):' + str(cnt_train_word))
    logging.info('unk count (dev):' + str(cnt_dev_unk))
    logging.info('unk rate  (dev):' + str(unk_dev_unk_rate))
    logging.info('cnt all words (dev):' + str(cnt_dev_word))
    # show model config
    logging.info('######################')
    logging.info('## Model Config')
    logging.info('model_name:' + str(model_name))
    logging.info('batchsize:' + str(batchsize))
    logging.info('optimizer:' + str(optimizer_name))
    # Save model config
    logging.info('######################')
    logging.info('## Model Save Config')
    logging.info('save_dir :' + str(save_dir))

    # save vocab
    logging.info('save_vocab        :' + save_vocab)
    logging.info('save_vocab_char   :' + save_vocab_char)
    logging.info('save_tags_vocab   :' + save_tags_vocab)
    logging.info('save_train_config :' + save_train_config)

    init_emb = None

    if is_train:
        deepcrf.util.write_vocab(save_vocab, vocab)
        deepcrf.util.write_vocab(save_vocab_char, vocab_char)
        deepcrf.util.write_vocab(save_tags_vocab, vocab_tags)
        deepcrf.util.write_vocab(save_train_config, args)

        for i, vocab_add in enumerate(vocab_adds):
            save_additional_vocab = save_name + '.vocab_additional_' + str(i)
            deepcrf.util.write_vocab(save_additional_vocab, vocab_add)

    n_vocab_add = [len(_vadd) for _vadd in vocab_adds]

    net = BiLSTM_CNN_CRF(n_vocab=len(vocab),
                         n_char_vocab=len(vocab_char),
                         emb_dim=args['n_word_emb'],
                         hidden_dim=args['n_hidden'],
                         n_layers=args['n_layer'],
                         init_emb=init_emb,
                         char_input_dim=args['n_char_emb'],
                         char_hidden_dim=args['n_char_hidden'],
                         n_label=len(vocab_tags),
                         n_add_feature_dim=args['n_add_feature_emb'],
                         n_add_feature=len(n_vocab_add),
                         n_vocab_add=n_vocab_add,
                         use_cudnn=args['use_cudnn'])
    my_cudnn(args['use_cudnn'])

    if args.get('word_emb_file', False):
        if word_emb_vocab_type == 'replace_all':
            # replace all vocab by Pre-trained embeddings
            assert_word_emb_shape(word_vecs.shape[1],
                                  net.word_embed.W.shape[1])
            net.word_embed.W.data = word_vecs[:]
        elif word_emb_vocab_type == 'replace_only':
            assert_no_emb(word_vecs)
            assert_word_emb_shape(word_vecs.shape[1],
                                  net.word_embed.W.shape[1])
            net.word_embed.W.data[word_ids] = word_vecs[:]
        elif word_emb_vocab_type == 'additional':
            assert_word_emb_shape(word_vecs.shape[1],
                                  net.word_embed.W.shape[1])
            v_size = additional_vecs.shape[0]
            net.word_embed.W.data[-v_size:] = additional_vecs[:]

    if args.get('return_model', False):
        return net

    if args['gpu'] >= 0:
        net.to_gpu()

    init_alpha = args['init_lr']
    if optimizer_name == 'adam':
        opt = optimizers.Adam(alpha=init_alpha, beta1=0.9, beta2=0.9)
    elif optimizer_name == 'adadelta':
        opt = optimizers.AdaDelta()
    if optimizer_name == 'sgd_mom':
        opt = optimizers.MomentumSGD(lr=init_alpha, momentum=0.9)
    if optimizer_name == 'sgd':
        opt = optimizers.SGD(lr=init_alpha)

    opt.setup(net)
    opt.add_hook(chainer.optimizer.GradientClipping(5.0))

    def eval_loop(x_data, x_char_data, y_data, x_train_additionals=[]):
        # dev or test
        net.set_train(train=False)
        iteration_list = range(0, len(x_data), batchsize)
        # perm = np.random.permutation(len(x_data))
        sum_loss = 0.0
        predict_lists = []
        for i_index, index in enumerate(iteration_list):
            x = x_data[index:index + batchsize]
            x_char = x_char_data[index:index + batchsize]
            target_y = y_data[index:index + batchsize]

            if efficient_gpu:
                x = [to_gpu(_) for _ in x]
                x_char = [[to_gpu(_) for _ in words] for words in x_char]
                target_y = [to_gpu(_) for _ in target_y]

            x_additional = []
            if len(x_train_additionals):
                x_additional = [[
                    to_gpu(_) for _ in x_ad[index:index + batchsize]
                ] for x_ad in x_train_additionals]

            output = net(x_data=x,
                         x_char_data=x_char,
                         x_additional=x_additional)
            predict, loss = net.predict(output, target_y)

            sum_loss += loss.data
            predict_lists.extend(predict)

        _, predict_tags = zip(*predict_lists)
        predicted_results = []
        for predict in predict_tags:
            predicted = [
                vocab_tags_inv[tag_idx] for tag_idx in to_cpu(predict)
            ]
            predicted_results.append(predicted)

        return predict_lists, sum_loss, predicted_results

    if args['model_filename']:
        model_filename = args['model_filename']
        serializers.load_hdf5(model_filename, net)

    if is_test:
        # predict
        # model_filename = args['model_filename']
        # model_filename = os.path.join(save_dir, model_filename)
        # serializers.load_hdf5(model_filename, net)
        vocab_tags_inv = dict([(v, k) for k, v in six.iteritems(vocab_tags)])
        x_predict = x_train
        x_char_predict = x_char_train
        x_additionals = x_train_additionals
        y_predict = y_train

        if dev_file:
            predict_dev, loss_dev, predict_dev_tags = eval_loop(
                x_dev, x_char_dev, y_dev, x_dev_additionals)
            gold_predict_pairs = [y_dev_cpu, predict_dev_tags]
            result, phrase_info = deepcrf.util.conll_eval(gold_predict_pairs,
                                                          flag=False,
                                                          tag_class=tag_names)
            all_result = result['All_Result']
            print('all_result: {}'.format(all_result))

        predict_pairs, _, _tmp = eval_loop(x_predict, x_char_predict,
                                           y_predict, x_additionals)
        _, predict_tags = zip(*predict_pairs)
        predicted_output = args['predicted_output']
        predicted_results = []
        for predict in predict_tags:
            predicted = [
                vocab_tags_inv[tag_idx] for tag_idx in to_cpu(predict)
            ]
            predicted_results.append(predicted)

        with open(predicted_output, 'w') as f:
            for predicted in predicted_results:
                for tag in predicted:
                    f.write(tag + '\n')
                f.write('\n')

        return False

    logging.info('start training...')
    tmax = args['max_iter']
    t = 0.0
    prev_dev_accuracy = 0.0
    prev_dev_f = 0.0
    for epoch in six.moves.xrange(args['max_iter']):
        # train
        logging.info('epoch:' + str(epoch))
        logging.info(' [train]')
        net.set_train(train=True)
        iteration_list = range(0, len(x_train), batchsize)
        perm = np.random.permutation(len(x_train))
        sum_loss = 0.0
        predict_train = []
        for i_index, index in enumerate(iteration_list):
            data = [(x_train[i], x_char_train[i], y_train[i])
                    for i in perm[index:index + batchsize]]
            x, x_char, target_y = zip(*data)

            x_additional = []
            if len(x_train_additionals):
                x_additional = [[
                    to_gpu(x_ad[add_i])
                    for add_i in perm[index:index + batchsize]
                ] for x_ad in x_train_additionals]

            if efficient_gpu:
                x = [to_gpu(_) for _ in x]
                x_char = [[to_gpu(_) for _ in words] for words in x_char]
                target_y = [to_gpu(_) for _ in target_y]

            output = net(x_data=x,
                         x_char_data=x_char,
                         x_additional=x_additional)
            predict, loss = net.predict(output, target_y)

            # loss
            sum_loss += loss.data

            # update
            net.cleargrads()
            loss.backward()
            opt.update()

            predict_train.extend(predict)

        # Evaluation
        train_accuracy = deepcrf.util.eval_accuracy(predict_train)

        logging.info('  loss     :' + str(sum_loss))
        logging.info('  accuracy :' + str(train_accuracy))

        # Dev
        predict_dev, loss_dev, predict_dev_tags = eval_loop(
            x_dev, x_char_dev, y_dev, x_dev_additionals)

        gold_predict_pairs = [y_dev_cpu, predict_dev_tags]
        result, phrase_info = deepcrf.util.conll_eval(gold_predict_pairs,
                                                      flag=False,
                                                      tag_class=tag_names)
        all_result = result['All_Result']

        # Evaluation
        dev_accuracy = deepcrf.util.eval_accuracy(predict_dev)
        logging.info(' [dev]')
        logging.info('  loss     :' + str(loss_dev))
        logging.info('  accuracy :' + str(dev_accuracy))
        logging.info('  f_measure :' + str(all_result[-1]))

        dev_f = all_result[-1]

        if prev_dev_f < dev_f:
            logging.info(' [update best model on dev set!]')
            dev_list = [prev_dev_f, dev_f]
            dev_str = '       ' + ' => '.join(map(str, dev_list))
            logging.info(dev_str)
            prev_dev_f = dev_f

            # Save model
            model_filename = save_name + '_epoch' + str(epoch)
            serializers.save_hdf5(model_filename + '.model', net)
            serializers.save_hdf5(model_filename + '.state', opt)
Example #50
0
def train_all(params):
    target_save_dir = osp.join(params['save_dir'], 'prepro',
                               params['dataset'] + '_' + params['splitBy'])
    graph_dir = osp.join('log_graph',
                         params['dataset'] + '_' + params['splitBy'])
    model_dir = osp.join(params['save_dir'], 'model',
                         params['dataset'] + '_' + params['splitBy'])

    if params['old']:
        params['data_json'] = 'old' + params['data_json']
        params['data_h5'] = 'old' + params['data_h5']
        params['image_feats'] = 'old' + params['image_feats']
        params['ann_feats'] = 'old' + params['ann_feats']
        params['id'] = 'old' + params['id']
        params['word_emb_path'] = 'old' + params['word_emb_path']

    with open('setting.json', 'w') as f:
        json.dump(params, f)
    if not osp.isdir(graph_dir):
        os.mkdir(graph_dir)
    loader = DataLoader(params)

    # model setting
    batch_size = params['batch_size']
    gpu_id = params['gpu_id']
    cuda.get_device(gpu_id).use()
    xp = cuda.cupy

    featsOpt = {
        'ann': osp.join(target_save_dir, params['ann_feats']),
        'img': osp.join(target_save_dir, params['image_feats'])
    }
    loader.loadFeats(featsOpt)
    loader.shuffle('train')

    ve = VisualEncoder(res6=L.ResNet152Layers().fc6).to_gpu(gpu_id)
    if 'attention' in params['id']:
        print('attention language encoder')
        le = LanguageEncoderAttn(len(loader.ix_to_word))
        rl_crit = ListenerReward(len(loader.ix_to_word),
                                 attention=True).to_gpu(gpu_id)
    else:
        le = LanguageEncoder(len(loader.ix_to_word))
        rl_crit = ListenerReward(len(loader.ix_to_word),
                                 attention=False).to_gpu(gpu_id)
    cca = CcaEmbedding().to_gpu(gpu_id)
    lm = LanguageModel(len(loader.ix_to_word), loader.seq_length)
    if params['pretrained_w']:
        print('pretrained word embedding...')
        word_emb = load_vcab_init(
            loader.word_to_ix,
            osp.join(target_save_dir, params['word_emb_path']))
        le.word_emb.W.data = word_emb
        lm.word_emb = le.word_emb

    le.to_gpu(gpu_id)
    lm.to_gpu(gpu_id)
    serializers.load_hdf5(osp.join(model_dir, params['id'] + ".h5"), rl_crit)

    ve_optim = optimizers.Adam(alpha=4e-5, beta1=0.8)
    le_optim = optimizers.Adam(alpha=4e-4, beta1=0.8)
    cca_optim = optimizers.Adam(alpha=4e-4, beta1=0.8)
    lm_optim = optimizers.Adam(alpha=4e-4, beta1=0.8)

    ve_optim.setup(ve)
    le_optim.setup(le)
    cca_optim.setup(cca)
    lm_optim.setup(lm)

    ve_optim.add_hook(chainer.optimizer.GradientClipping(0.1))
    le_optim.add_hook(chainer.optimizer.GradientClipping(0.1))
    cca_optim.add_hook(chainer.optimizer.GradientClipping(0.1))
    lm_optim.add_hook(chainer.optimizer.GradientClipping(0.1))
    ve.joint_enc.W.update_rule.hyperparam.alpha = 4e-4
    ve.joint_enc.b.update_rule.hyperparam.alpha = 4e-4

    iteration = 0
    epoch = 0
    val_loss_history = []
    val_loss_lm_s_history = []
    val_loss_lm_l_history = []
    val_loss_l_history = []
    val_acc_history = []
    val_rank_acc_history = []
    min_val_loss = 100
    while True:
        chainer.config.train = True
        chainer.config.enable_backprop = True
        ve.zerograds()
        le.zerograds()
        cca.zerograds()
        lm.zerograds()
        rl_crit.zerograds()

        data = loader.getBatch('train', params)

        ref_ann_ids = data['ref_ann_ids']
        pos_feats = Variable(xp.array(data['feats'], dtype=xp.float32))
        neg_feats = Variable(xp.array(data['neg_feats'], dtype=xp.float32))

        feats = F.concat([pos_feats, neg_feats, pos_feats], axis=0)
        seqz = np.concatenate([data['seqz'], data['seqz'], data['neg_seqz']],
                              axis=0)
        lang_last_ind = calc_max_ind(seqz)
        seqz = Variable(xp.array(seqz, dtype=xp.int32))

        vis_enc_feats = ve(feats)
        lang_enc_feats = le(seqz, lang_last_ind)
        cossim, vis_emb_feats = cca(vis_enc_feats, lang_enc_feats)
        vis_feats = vis_combine(vis_enc_feats, vis_emb_feats)
        logprobs = lm(vis_feats, seqz, lang_last_ind)

        # emb loss
        pairSim, vis_unpairSim, lang_unpairSim = F.split_axis(cossim,
                                                              3,
                                                              axis=0)
        emb_flows = {
            'vis': [pairSim, vis_unpairSim],
            'lang': [pairSim, lang_unpairSim]
        }
        emb_loss = emb_crits(emb_flows, params['emb_margin'])

        # lang loss
        pairP, vis_unpairP, lang_unpairP = F.split_axis(logprobs, 3, axis=1)
        pair_num, _, lang_unpair_num = np.split(lang_last_ind, 3)
        num_labels = {'T': pair_num, 'F': lang_unpair_num}
        lm_flows = {
            'T': pairP,
            'visF': [pairP, vis_unpairP],
            'langF': [pairP, lang_unpairP]
        }
        lm_loss = lm_crits(lm_flows,
                           num_labels,
                           params['lm_margin'],
                           vlamda=params['vis_rank_weight'],
                           llamda=params['lang_rank_weight'])

        # RL loss (pos,pos)のみ
        rl_vis_feats = F.split_axis(vis_feats, 3, axis=0)[0]
        sampled_seq, sample_log_probs = lm.sample(rl_vis_feats)
        sampled_lang_last_ind = calc_max_ind(sampled_seq)
        rl_loss = rl_crit(pos_feats, sampled_seq, sample_log_probs,
                          sampled_lang_last_ind)  #, lm.baseline)

        loss = emb_loss + lm_loss + rl_loss
        print(emb_loss, lm_loss, rl_loss)

        loss.backward()

        ve_optim.update()
        le_optim.update()
        cca_optim.update()
        lm_optim.update()

        if data['bounds']['wrapped']:
            print('one epoch finished!')
            loader.shuffle('train')

        if params['check_sent']:
            sampled_sents = loader.decode_sequence(cuda.to_cpu(sampled_seq),
                                                   sampled_lang_last_ind)
            for i in range(len(sampled_sents)):
                print('sampled sentence : ', ' '.join(sampled_sents[i]))
                print('reward : ', rl_crit.reward[i])

        if iteration % params['losses_log_every'] == 0:
            acc = xp.where(rl_crit.reward > 0.5, 1, 0).mean()
            print('{} iter : train loss {}, acc : {}, reward_mean : {}'.format(
                iteration, loss.data, acc, rl_crit.reward.mean()))

        if iteration % params[
                'mine_hard_every'] == 0 and iteration > 0 and params[
                    'mine_hard']:
            make_graph(ve, cca, loader, 'train', params, xp)

        if (iteration % params['save_checkpoint_every'] == 0
                and iteration > 0):
            chainer.config.train = False
            chainer.config.enable_backprop = False
            loader.resetImageIterator('val')
            loss_sum = 0
            loss_generation = 0
            loss_lm_margin = 0
            loss_emb_margin = 0
            loss_evals = 0
            accuracy = 0
            rank_acc = 0
            rank_num = 0
            while True:
                data = loader.getImageBatch('val', params)
                image_id = data['image_id']
                img_ann_ids = data['img_ann_ids']
                sent_ids = data['sent_ids']
                gd_ixs = data['gd_ixs']
                feats = Variable(xp.array(data['feats'], dtype=xp.float32))
                seqz = data['seqz']
                lang_last_ind = calc_max_ind(seqz)
                scores = []
                for i, sent_id in enumerate(sent_ids):
                    gd_ix = gd_ixs[i]
                    labels = xp.zeros(len(img_ann_ids), dtype=xp.int32)
                    labels[gd_ix] = 1
                    labels = Variable(labels)

                    sent_seqz = np.concatenate(
                        [[seqz[i]] for _ in range(len(img_ann_ids))], axis=0)
                    one_last_ind = np.array([lang_last_ind[i]] *
                                            len(img_ann_ids))
                    sent_seqz = Variable(xp.array(sent_seqz, dtype=xp.int32))

                    vis_enc_feats = ve(feats)
                    lang_enc_feats = le(sent_seqz, one_last_ind)
                    cossim, vis_emb_feats = cca(vis_enc_feats, lang_enc_feats)
                    vis_feats = vis_combine(vis_enc_feats, vis_emb_feats)
                    logprobs = lm(vis_feats, sent_seqz, one_last_ind).data

                    gd_ix = gd_ixs[i]
                    lm_generation_loss = lm_crits(
                        {
                            'T': logprobs[:, gd_ix, xp.newaxis]
                        }, {
                            'T': one_last_ind[gd_ix, np.newaxis]
                        },
                        params['lm_margin'],
                        vlamda=0,
                        llamda=0).data

                    lm_scores = -computeLosses(logprobs, one_last_ind)
                    lm_margin_loss, _, _ = compute_margin_loss(
                        lm_scores, gd_ix, params['lm_margin'])
                    scores.append(lm_scores[gd_ix])

                    emb_margin_loss, pos_sc, max_neg_sc = compute_margin_loss(
                        cossim.data, gd_ix, params['emb_margin'])
                    loss_generation += lm_generation_loss
                    loss_lm_margin += lm_margin_loss
                    loss_emb_margin += emb_margin_loss
                    loss_sum += lm_generation_loss + lm_margin_loss + emb_margin_loss
                    loss_evals += 1
                    if pos_sc > max_neg_sc:
                        accuracy += 1
                if params['dataset'] == 'refgta':
                    rank_a, rank_n = calc_rank_acc(scores, data['rank'])
                    rank_acc += rank_a
                    rank_num += rank_n
                print('{} iter | {}/{} validating acc : {}'.format(
                    iteration, data['bounds']['it_pos_now'],
                    data['bounds']['it_max'], accuracy / loss_evals))

                if data['bounds']['wrapped']:
                    print('validation finished!')
                    fin_val_loss = cuda.to_cpu(loss_sum / loss_evals)
                    loss_generation = cuda.to_cpu(loss_generation / loss_evals)
                    loss_lm_margin = cuda.to_cpu(loss_lm_margin / loss_evals)
                    loss_emb_margin = cuda.to_cpu(loss_emb_margin / loss_evals)
                    fin_val_acc = accuracy / loss_evals
                    break
            val_loss_history.append(fin_val_loss)
            val_loss_lm_s_history.append(loss_generation)
            val_loss_lm_l_history.append(loss_lm_margin)
            val_loss_l_history.append(loss_emb_margin)
            val_acc_history.append(fin_val_acc)
            if min_val_loss > fin_val_loss:
                print('val loss {} -> {} improved!'.format(
                    min_val_loss, val_loss_history[-1]))
                min_val_loss = fin_val_loss
                serializers.save_hdf5(
                    osp.join(model_dir,
                             params['id'] + params['id2'] + "ve.h5"), ve)
                serializers.save_hdf5(
                    osp.join(model_dir,
                             params['id'] + params['id2'] + "le.h5"), le)
                serializers.save_hdf5(
                    osp.join(model_dir,
                             params['id'] + params['id2'] + "cca.h5"), cca)
                serializers.save_hdf5(
                    osp.join(model_dir,
                             params['id'] + params['id2'] + "lm.h5"), lm)

            ## graph
            plt.title("accuracy")
            plt.plot(np.arange(len(val_acc_history)),
                     val_acc_history,
                     label="val_accuracy")
            plt.legend()
            plt.savefig(
                os.path.join(graph_dir,
                             params['id'] + params['id2'] + "_joint_acc.png"))
            plt.close()

            plt.title("loss")
            plt.plot(np.arange(len(val_loss_history)),
                     val_loss_history,
                     label="all_loss")
            plt.plot(np.arange(len(val_loss_history)),
                     val_loss_lm_s_history,
                     label="generation_loss")
            plt.legend()
            plt.savefig(
                os.path.join(graph_dir,
                             params['id'] + params['id2'] + "_joint_loss.png"))
            plt.close()

            plt.title("loss")
            plt.plot(np.arange(len(val_loss_history)),
                     val_loss_lm_l_history,
                     label="lm_comp_loss")
            plt.plot(np.arange(len(val_loss_history)),
                     val_loss_l_history,
                     label="comp_loss")
            plt.legend()
            plt.savefig(
                os.path.join(
                    graph_dir,
                    params['id'] + params['id2'] + "_joint_comp_loss.png"))
            plt.close()

            if params['dataset'] == 'refgta':
                print(rank_num)
                val_rank_acc_history.append(rank_acc / rank_num)
                plt.title("rank loss")
                plt.plot(np.arange(len(val_rank_acc_history)),
                         val_rank_acc_history,
                         label="rank_acc")
                plt.legend()
                plt.savefig(
                    os.path.join(
                        graph_dir,
                        params['id'] + params['id2'] + "_rank_acc.png"))
                plt.close()

        if iteration > params['learning_rate_decay_start'] and params[
                'learning_rate_decay_start'] >= 0:
            frac = (iteration - params['learning_rate_decay_start']
                    ) / params['learning_rate_decay_every']
            decay_factor = math.pow(0.1, frac)
            ve_optim.alpha *= decay_factor
            le_optim.alpha *= decay_factor
            cca_optim.alpha *= decay_factor
            lm_optim.alpha *= decay_factor

        iteration += 1
Example #51
0
    # (BatchNormalization.avg_varはout_chと同じサイズ)
    txt = "yolov2.bn%d.avg_var = dat[%d:%d]" % (i + 1, offset, offset + out_ch)
    offset += out_ch
    exec(txt)

    # load convolution weight(Convolution2D.Wは、outch * in_ch * フィルタサイズ。これを(out_ch, in_ch, 3, 3)にreshapeする)
    txt = "yolov2.conv%d.W.data = dat[%d:%d].reshape(%d, %d, %d, %d)" % (
        i + 1, offset, offset + (out_ch * in_ch * ksize * ksize), out_ch, in_ch, ksize, ksize)
    offset += (out_ch * in_ch * ksize * ksize)
    exec(txt)
    print(i + 1, offset)

# load last convolution weight(BiasとConvolution2Dのみロードする)
in_ch = 1024
out_ch = last_out
ksize = 1

txt = "yolov2.bias%d.b.data = dat[%d:%d]" % (i + 2, offset, offset + out_ch)
offset += out_ch
exec(txt)

txt = "yolov2.conv%d.W.data = dat[%d:%d].reshape(%d, %d, %d, %d)" % (
    i + 2, offset, offset + (out_ch * in_ch * ksize * ksize), out_ch, in_ch, ksize, ksize)
offset += out_ch * in_ch * ksize * ksize
exec(txt)
print(i + 2, offset)

print("save weights file to yolov2_darknet.model")
serializers.save_hdf5("yolov2_darknet.model", yolov2)
# トレーナーは、何回学習を繰り返すのかを決める。
# ここではepoch_size分、学習を繰り返す。
trainer = training.Trainer(updater, (epoch_size, 'epoch'))

# 進行状況の表示をする。
trainer.extend(extensions.ProgressBar())

# 学習開始
trainer.run()

# 学習後のモデルの正当性を確認
print("Answer:", end="")

ok = 0
# テストデータ全てを試す
for test_i in test:
    # test_iの画像情報をValiable(Chainer専用の型)に変換
    x = Variable(np.array([test_i[0]], dtype=np.float32))
    # test_iの正解番号
    t = test_i[1]
    # モデルにxを入力し、順伝播させた結果を取得(list)
    out = model.fwd(x)
    # 出力が大きいユニットの番号を回答とする。
    ans = np.argmax(out.data)
    if (ans == t): ok += 1

print((ok * 1.0) / len(test))

# モデルをh5形式で保存
serializers.save_hdf5(modelname, model)
Example #53
0
            #util.evaluate_dataset(vae, X_train, batch_size, train_log_file, False, opt)
            util.evaluate_dataset(vae, X_validation, batch_size, test_log_file,
                                  False, opt)

            if (
                (args['-o'] is not None)
                    and ((bi - 1) % (log_interval * 100) == 0)
            ):  #Additional *100 term because we don't want a checkpoint every log point
                print(
                    '##################### Saving Model Checkpoint     #####################'
                )

                batch_number = str(bi).zfill(6)
                modelfile = directory + '/' + batch_number + '.h5'
                print "Writing model checkpoint to '%s' ..." % (modelfile)
                serializers.save_hdf5(modelfile, vae)

        # (Optionally:) visualize computation graph
        if bi == 1 and args['--vis'] is not None:
            print "Writing computation graph to '%s/%s'." % (directory,
                                                             args['--vis'])
            g = computational_graph.build_computational_graph([obj])
            util.print_compute_graph(directory + '/' + args['--vis'], g)

        # Sample a set of poses
        if (bi % sample_every_epoch == 0) and data_type == 'pose':
            counter += 1
            print "   # sampling"
            z = np.random.normal(loc=0.0, scale=1.0, size=(1024, nlatent))
            z = chainer.Variable(xp.asarray(z, dtype=np.float32),
                                 volatile='ON')
Example #54
0
 def save(self, filename):
     if os.path.isfile(filename):
         os.remove(filename)
     serializers.save_hdf5(filename, self)
Example #55
0
        optimizer.update()
        #g_accum_loss=0
        return loss.data, accuracy.data
    else:
        loss, accuracy = model(x, t, True)
        return accuracy.data

    #return loss.data, accuracy.data


#標準偏差を計算する
g_stdDev = compute_stdDeviation(model, pathList)
#学習開始
train_loop()
#write stdDeviation array
pickle.dump(g_stdDev, open('sigma.npy', 'wb'), -1)

#write log (train)
#dicPlot=dict(zip(logArray[::2],logArray[1::2]))
#with open('plot.json', 'w') as f:
#	json.dump(dicPlot, f, sort_keys=True, indent=4)

#write log (validation)
#dicPlot2=dict(zip(logArray2[::2],logArray2[1::2]))
#with open('plotV.json', 'w') as f:
#	json.dump(dicPlot2, f, sort_keys=True, indent=4)

# Save final model
model.to_cpu()
serializers.save_hdf5('modelhdf5', model)
Example #56
0
    # parser.add_argument('--lr_decay_ratio', type=float, default=0.1)

    # parser.add_argument('--restart_from', type=str)
    # parser.add_argument('--epoch_offset', type=int, default=0)

    # parser.add_argument('--flip', type=int, default=0)
    # parser.add_argument('--rot', type=int, default=0)
    # parser.add_argument('--shift', type=int, default=0)
    parser.add_argument('--transformations', type=str,
                        default='')  # ast.literal_eval, default={})

    # parser.add_argument('--size', type=int, default=28)

    parser.add_argument('--val_freq', type=int, default=10)
    parser.add_argument('--save_freq', type=int, default=10)

    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--gpu', type=int, default=0)

    # parser.add_argument('--snapshot_freq', type=int, default=10)

    args = parser.parse_args()

    val_error, model, resdict = train(logme=vars(args), **vars(args))

    print 'Finished training'
    print 'Final validation error:', val_error
    print 'Saving model...'
    import chainer.serializers as sl
    sl.save_hdf5('./my.model', model)
Example #57
0
 def save_model(self, model_filename):
     """Save a network model to a file
     """
     serializers.save_hdf5(model_filename, self.model)
     serializers.save_hdf5(model_filename + '.opt', self.optimizer)
Example #58
0
            }, open('epoch%d_loss_by_tree.json' % (epoch + 1), 'w'))
        loss_curve.append(sum_loss / len(train_trees))
        print('train loss: {:.2f}'.format(sum_loss / len(train_trees)))

        print("Development data evaluation:")
        t = Thread(target=traverse_dev,
                   args=(copy.deepcopy(model), dev_trees, dev_loss, args.gpus))
        t.start()

        throughput = float(len(train_trees)) / (now - cur_time)
        print('{:.2f} iter/sec, {:.2f} sec'.format(throughput, now - cur_time))
        print()

        if (epoch + 1) % args.evalinterval == 0:
            print("Model saving...")
            serializers.save_hdf5('./epoch_' + str(epoch + 1) + '.model',
                                  model)

    json.dump({"loss": dev_loss}, open('dev_loss_by_epoch.json', 'w'))

    for i, fp in enumerate(args.reorderfile):
        with codecs.open(fp + '.reordered', 'w', 'utf-8') as fre:
            for tree in rtrees[i]:
                _, pred = traverse(model, tree, train=False, pred=True)
                print(' '.join(pred), file=fre)

    # エポックごとのロスの描画
    t.join()
    plt.clf()
    plt.figure(figsize=(8, 8))
    plt.plot(np.array([i + 1 for i in range(args.epoch)]),
             np.array(loss_curve),
Example #59
0
def train(modelfn,
          trainfn,
          valfn,
          epochs,
          batchsize,
          opt,
          opt_kwargs,
          net_kwargs,
          transformations,
          val_freq,
          save_freq,
          seed,
          gpu,
          silent=False,
          logme=None):

    # Set the seed
    np.random.seed(seed)

    # Load an pre-process the data
    try:
        datadir = os.environ['DATADIR']
    except KeyError:
        raise RuntimeError(
            'Please set DATADIR environment variable (e.g. in ~/.bashrc) '
            'to a folder containing the required datasets.')

    train_set = np.load(os.path.join(datadir, trainfn))
    val_set = np.load(os.path.join(datadir, valfn))
    train_data = train_set['data']
    train_labels = train_set['labels']
    val_data = val_set['data']
    val_labels = val_set['labels']
    train_data, val_data, train_labels, val_labels = preprocess_mnist_data(
        train_data, val_data, train_labels, val_labels)

    # create result dir
    log_fn, result_dir = create_result_dir(modelfn, logme)

    # create model and optimizer
    model, optimizer = get_model_and_optimizer(result_dir, modelfn, opt,
                                               opt_kwargs, net_kwargs, gpu)

    # get the last commit
    subp = subprocess.Popen(['git', 'rev-parse', 'HEAD'],
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
    out, err = subp.communicate()
    commit = out.strip()
    if err.strip():
        logging.error('Subprocess returned %s' % err.strip())
    logging.info('Commit: ' + commit)

    # Get number of parameters
    # if not silent:
    #     print 'Parameter name, shape, size:'
    #     for p in model.params():
    #         print p.name, p.data.shape, p.data.size
    num_params = sum([p.data.size for p in model.params()])
    logging.info('Number of parameters:' + str(num_params))
    if not silent:
        print 'Number of parameters:' + str(num_params)

    n_train = train_data.shape[0]
    n_val = val_data.shape[0]

    logging.info('start training...')

    train_epochs = []
    train_errors = []
    train_losses = []
    train_times = []
    val_epochs = []
    val_errors = []
    val_losses = []
    val_times = []

    begin_time = time.time()

    sum_loss, sum_accuracy = validate(val_data, val_labels, model, batchsize,
                                      silent, gpu)
    val_times.append(time.time() - begin_time)
    val_epochs.append(0)
    val_errors.append(1. - sum_accuracy / n_val)
    val_losses.append(sum_loss / n_val)
    msg = 'epoch:{:02d}\ttest mean loss={}, error={}'.format(
        0, sum_loss / n_val, 1. - sum_accuracy / n_val)
    logging.info(msg)
    if not silent:
        print '\n%s' % msg

    # learning loop
    for epoch in range(1, epochs + 1):

        sum_loss, sum_accuracy = train_epoch(train_data, train_labels, model,
                                             optimizer, batchsize,
                                             transformations, silent, gpu)
        train_times.append(time.time() - begin_time)
        train_epochs.append(epoch)
        train_errors.append(1. - sum_accuracy / n_train)
        train_losses.append(sum_loss / n_train)
        msg = 'epoch:{:02d}\ttrain mean loss={}, error={}'.format(
            epoch, sum_loss / n_train, 1. - sum_accuracy / n_train)
        logging.info(msg)
        if not silent:
            print '\n%s' % msg

        if epoch % val_freq == 0:
            print 'FINETUNING'
            model.start_finetuning()
            sum_loss, sum_accuracy = train_epoch(train_data,
                                                 train_labels,
                                                 model,
                                                 optimizer,
                                                 batchsize,
                                                 transformations,
                                                 silent,
                                                 gpu,
                                                 finetune=True)
            msg = 'epoch:{:02d}\tfinetune mean loss={}, error={}'.format(
                epoch, sum_loss / n_train, 1. - sum_accuracy / n_train)
            logging.info(msg)
            if not silent:
                print '\n%s' % msg

            sum_loss, sum_accuracy = validate(val_data, val_labels, model,
                                              batchsize, silent, gpu)
            val_times.append(time.time() - begin_time)
            val_epochs.append(epoch)
            val_errors.append(1. - sum_accuracy / n_val)
            val_losses.append(sum_loss / n_val)
            msg = 'epoch:{:02d}\ttest mean loss={}, error={}'.format(
                epoch, sum_loss / n_val, 1. - sum_accuracy / n_val)
            logging.info(msg)
            if not silent:
                print '\n%s' % msg

            mean_error = 1.0 - sum_accuracy / n_val

        if save_freq > 0 and epoch % save_freq == 0:
            print 'Saving model...'
            serializers.save_hdf5(
                os.path.join(result_dir, 'epoch.' + str(epoch) + '.model'),
                model)

    print 'Saving model...'
    serializers.save_hdf5(os.path.join(result_dir, 'final.model'), model)

    resdict = {
        'train_times': train_times,
        'train_epochs': train_epochs,
        'train_errors': train_errors,
        'train_losses': train_losses,
        'val_times': val_times,
        'val_epochs': val_epochs,
        'val_errors': val_errors,
        'val_losses': val_losses
    }

    print 'Saving results...'
    with open(os.path.join(result_dir, 'results.pickle'), 'wb') as handle:
        pickle.dump(resdict, handle)

    return mean_error, model, resdict
Example #60
0
def train(args):
    vocab = Vocabulary.from_conll(args.train, args.vocab)
    train_dataset = [conll_to_train(x, vocab) for x in read_conll(args.train)]
    dev_dataset = [conll_to_train(x, vocab) for x in read_conll(args.dev)]

    parser = Parser(args.vocab, args.embed, args.hidden, args.depth)
    if args.gpu >= 0:
        parser.to_gpu()

    opt = optimizers.AdaGrad(lr=0.01)
    opt.setup(parser)
    opt.add_hook(optimizer.GradientClipping(10))
    opt.add_hook(optimizer.WeightDecay(0.0001))

    for epoch in range(args.epoch):
        random.shuffle(train_dataset)

        parser.zerograds()
        loss = XP.fzeros(())

        for i, data in enumerate(train_dataset):
            trace('epoch %3d: train sample %6d:' % (epoch + 1, i + 1))
            parent_scores, root_scores = parser.forward(data)
            if len(data) > 1:
                parent_scores = functions.split_axis(parent_scores, len(data),
                                                     0)
            else:
                parent_scores = (parent_scores, )

            root = -1
            for j, (p_scores, (wid,
                               parent)) in enumerate(zip(parent_scores, data)):
                if parent == -1:
                    trace('  %3d: root' % j)
                    root = j
                else:
                    parent_est = p_scores.data.argmax()
                    trace('%c %3d -> %3d (%3d)' %
                          ('*' if parent == parent_est else ' ', j, parent_est,
                           parent))
                    loss += functions.softmax_cross_entropy(
                        p_scores, XP.iarray([parent]))

            root_est = root_scores.data.argmax()
            trace('ROOT: %3d (%3d)' % (root_est, root))
            loss += functions.softmax_cross_entropy(root_scores,
                                                    XP.iarray([root]))

            if (i + 1) % 200 == 0:
                loss.backward()
                opt.update()
                parser.zerograds()
                loss = XP.fzeros(())

        loss.backward()
        opt.update()
        trace('epoch %3d: trained.                        ' % (epoch + 1))

        parent_num = 0
        parent_match = 0
        root_num = 0
        root_match = 0
        for i, data in enumerate(dev_dataset):
            trace('epoch %3d: dev sample %6d:' % (epoch + 1, i + 1),
                  rollback=True)
            parent_scores, root_scores = parser.forward(data)
            if len(data) > 1:
                parent_scores = functions.split_axis(parent_scores, len(data),
                                                     0)
            else:
                parent_scores = (parent_scores, )

            root = -1
            for j, (p_scores, (wid,
                               parent)) in enumerate(zip(parent_scores, data)):
                if parent == -1:
                    root = j
                else:
                    parent_est = p_scores.data.argmax()
                    parent_num += 1
                    parent_match += 1 if parent_est == parent else 0

            root_est = root_scores.data.argmax()
            root_num += 1
            root_match += 1 if root_est == root else 0

        result_str = \
          'epoch %3d: dev: parent-acc = %.4f (%5d/%5d), root-acc = %.4f (%4d/%4d)' % \
          ( \
            epoch + 1, \
            parent_match / parent_num, parent_match, parent_num, \
            root_match / root_num, root_match, root_num)
        trace(result_str)

        with open(args.model + '.log', 'a') as fp:
            print(result_str, file=fp)

        trace('epoch %3d: saving models ...' % (epoch + 1))
        prefix = args.model + '.%03d' % (epoch + 1)
        vocab.save(prefix + '.vocab')
        parser.save_spec(prefix + '.parent_spec')
        serializers.save_hdf5(prefix + '.parent_weights', parser)

    trace('finished.')