コード例 #1
0
ファイル: img2seq.py プロジェクト: yzchair/LaTeX_OCR
    def _run_epoch(self, config, train_set, val_set, epoch, lr_schedule):
        """Performs an epoch of training

        Args:
            config: Config instance
            train_set: Dataset instance
            val_set: Dataset instance
            epoch: (int) id of the epoch, starting at 0
            lr_schedule: LRSchedule instance that takes care of learning proc

        Returns:
            score: (float) model will select weights that achieve the highest
                score

        """
        # logging
        batch_size = config.batch_size
        nbatches = (len(train_set) + batch_size - 1) // batch_size
        prog = Progbar(nbatches)

        # iterate over dataset
        for i, (img, formula) in enumerate(minibatches(train_set, batch_size)):
            # get feed dict
            fd = self._get_feed_dict(img,
                                     training=True,
                                     formula=formula,
                                     lr=lr_schedule.lr,
                                     dropout=config.dropout)

            # update step
            _, loss_eval = self.sess.run([self.train_op, self.loss],
                                         feed_dict=fd)
            prog.update(i + 1, [("loss", loss_eval),
                                ("perplexity", np.exp(loss_eval)),
                                ("lr", lr_schedule.lr)])

            # update learning rate
            lr_schedule.update(batch_no=epoch * nbatches + i)

            # 生成summary
            summary_str = self.sess.run(self.merged, feed_dict=fd)
            self.file_writer.add_summary(summary_str, epoch)  # 将summary 写入文件

            # if (i+1) % 100 == 0:
            #     # 太慢了,读了 100 批次后就保存先,保存的权重要用于调试 attention
            #     self.save_debug_session(epoch, i)

        # logging
        self.logger.info("- Training: {}".format(prog.info))

        # evaluation
        config_eval = Config({
            "dir_answers": self._dir_output + "formulas_val/",
            "batch_size": config.batch_size
        })
        scores = self.evaluate(config_eval, val_set)
        score = scores[config.metric_val]
        lr_schedule.update(score=score)

        return score
コード例 #2
0
    def _run_train_epoch(self, config, train_set, val_set, epoch, lr_schedule):
        """Performs an epoch of training
        Args:
            config: Config instance
            train_set: Dataset instance
            val_set: Dataset instance
            epoch: (int) id of the epoch, starting at 0
            lr_schedule: LRSchedule instance that takes care of learning proc
        Returns:
            score: (float) model will select weights that achieve the highest score
        """
        # logging
        batch_size = config.batch_size
        nbatches = (len(train_set) + batch_size - 1) // batch_size
        prog = Progbar(nbatches)
        self.model.train()
        self.encoder.train()
        self.decoder.train()
        train_loader = torch.utils.data.DataLoader(
            ImgFormulaDataset(train_set),
            batch_size=batch_size,
            shuffle=True,
            num_workers=3,
            pin_memory=True)

        # for i, (img, formula) in enumerate(train_loader):
        for i, (img, formula) in enumerate(minibatches(train_set, batch_size)):
            img = pad_batch_images_2(img)
            img = torch.FloatTensor(img)  # (N, W, H, C)
            formula, formula_length = pad_batch_formulas(
                formula, self._vocab.id_pad, self._vocab.id_end)
            img = img.permute(0, 3, 1, 2)  # (N, C, W, H)
            formula = torch.LongTensor(formula)  # (N,)

            loss_eval = self.getLoss(img,
                                     formula=formula,
                                     lr=lr_schedule.lr,
                                     dropout=config.dropout,
                                     training=True)
            prog.update(i + 1, [("loss", loss_eval), ("lr", lr_schedule.lr)])

            # update learning rate
            lr_schedule.update(batch_no=epoch * nbatches + i)

        self.logger.info("- Training: {}".format(prog.info))
        self.logger.info("- Config: (before evaluate, we need to see config)")
        config.show(fun=self.logger.info)

        # evaluation
        config_eval = Config({
            "dir_answers": self._dir_output + "formulas_val/",
            "batch_size": config.batch_size
        })
        scores = self.evaluate(config_eval, val_set)
        score = scores["perplexity"]
        lr_schedule.update(score=score)

        return score
コード例 #3
0
def main(data, vocab, training, model, output):
    # Load configs
    dir_output = output
    config = Config([data, vocab, training, model])
    config.save(dir_output)
    vocab = Vocab(config)

    # Load datasets
    train_set = DataGenerator(path_formulas=config.path_formulas_train,
            dir_images=config.dir_images_train, img_prepro=greyscale,
            max_iter=config.max_iter, bucket=config.bucket_train,
            path_matching=config.path_matching_train,
            max_len=config.max_length_formula,
            form_prepro=vocab.form_prepro)

    
    all_img = []
    all_formula = []
    for i, (_img, _formula) in enumerate(minibatches(train_set, batch_size)):
        all_img.append(_img)
        if _formula is not None:
            _formula, _formula_length = pad_batch_formulas(
            _formula,
            vocab.id_pad,
            vocab.id_end
        )
        all_formula.append(_formula)
    
    np.save('np_formula', np.array(all_formula))
    np.save('np_img', np.array(all_img))

    print("DONE EXPORTING NUMPY FILES")
    return None
    val_set = DataGenerator(path_formulas=config.path_formulas_val,
            dir_images=config.dir_images_val, img_prepro=greyscale,
            max_iter=config.max_iter, bucket=config.bucket_val,
            path_matching=config.path_matching_val,
            max_len=config.max_length_formula,
            form_prepro=vocab.form_prepro)

    # Define learning rate schedule
    n_batches_epoch = ((len(train_set) + config.batch_size - 1) //
                        config.batch_size)
    lr_schedule = LRSchedule(lr_init=config.lr_init,
            start_decay=config.start_decay*n_batches_epoch,
            end_decay=config.end_decay*n_batches_epoch,
            end_warm=config.end_warm*n_batches_epoch,
            lr_warm=config.lr_warm,
            lr_min=config.lr_min)

    # Build model and train
    model = Img2SeqModel(config, dir_output, vocab)
    model.build_train(config)
    model.train(config, train_set, val_set, lr_schedule)
コード例 #4
0
ファイル: img2seq.py プロジェクト: yzchair/LaTeX_OCR
    def write_prediction(self, config, test_set):
        """Performs an epoch of evaluation

        Args:
            config: (Config) with batch_size and dir_answers
            test_set:(Dataset) instance

        Returns:
            files: (list) of path to files
            perp: (float) perplexity on test set

        """
        # initialize containers of references and predictions
        if self._config.decoding == "greedy":
            refs, hyps = [], [[]]
        elif self._config.decoding == "beam_search":
            refs, hyps = [], [[] for i in range(self._config.beam_size)]

        # iterate over the dataset
        n_words, ce_words = 0, 0  # sum of ce for all words + nb of words
        for img, formula in minibatches(test_set, config.batch_size):
            fd = self._get_feed_dict(img,
                                     training=False,
                                     formula=formula,
                                     dropout=1)
            ce_words_eval, n_words_eval, ids_eval = self.sess.run(
                [self.ce_words, self.n_words, self.pred_test.ids],
                feed_dict=fd)
            # TODO(guillaume): move this logic into tf graph
            if self._config.decoding == "greedy":
                ids_eval = np.expand_dims(ids_eval, axis=1)

            elif self._config.decoding == "beam_search":
                ids_eval = np.transpose(ids_eval, [0, 2, 1])
            # print("---------------------------------------------------------------after decoding :")
            # print(ids_eval)
            n_words += n_words_eval
            ce_words += ce_words_eval
            # print("---------------------------------------------------------------formula and prediction :")
            for form, preds in zip(formula, ids_eval):
                refs.append(form)
                # print(form, "    ----------    ", preds[0])
                for i, pred in enumerate(preds):
                    hyps[i].append(pred)

        files = write_answers(refs, hyps, self._vocab.id_to_tok,
                              config.dir_answers, self._vocab.id_end)

        perp = -np.exp(ce_words / float(n_words))

        return files, perp
コード例 #5
0
ファイル: img2seq_2.py プロジェクト: zengqi0730/LaTeX_OCR_PRO
    def write_prediction(self, config, test_set):
        """Performs an epoch of evaluation

        Args:
            config: (Config) with batch_size and dir_answers
            test_set:(Dataset) instance

        Returns:
            files: (list) of path to files
            perp: (float) perplexity on test set

        """
        # initialize containers of references and predictions
        if self._config.decoding == "greedy":
            refs, hyps = [], [[]]
        elif self._config.decoding == "beam_search":
            refs, hyps = [], [[] for i in range(self._config.beam_size)]

        nbatches = (len(test_set) + config.batch_size - 1) // config.batch_size
        prog = Progbar(nbatches)
        n_words, ce_words = 0, 0  # sum of ce for all words + nb of words
        for i, (img,
                formula) in enumerate(minibatches(test_set,
                                                  config.batch_size)):
            fd = self._get_feed_dict(img, formula=formula, dropout=1)
            ce_words_eval, n_words_eval, ids_eval = self.sess.run(
                [self.ce_words, self.n_words, self.pred_test.ids],
                feed_dict=fd)

            if self._config.decoding == "greedy":
                ids_eval = np.expand_dims(ids_eval, axis=1)
            elif self._config.decoding == "beam_search":
                ids_eval = np.transpose(ids_eval, [0, 2, 1])
            n_words += n_words_eval
            ce_words += ce_words_eval

            for form, preds in zip(formula, ids_eval):
                refs.append(form)
                for j, pred in enumerate(preds):
                    hyps[j].append(pred)

            prog.update(i + 1,
                        [("perplexity", -np.exp(ce_words / float(n_words)))])

        files = write_answers(refs, hyps, self._vocab.id_to_tok,
                              config.dir_answers, self._vocab.id_end)

        perp = -np.exp(ce_words / float(n_words))

        return files, perp
コード例 #6
0
    def _run_train(self, config, train_set, val_set, epoch, lr_schedule):
        """Performs an epoch of training

        Args:
            config: Config instance
            train_set: Dataset instance
            val_set: Dataset instance
            epoch: (int) id of the epoch, starting at 0
            lr_schedule: LRSchedule instance that takes care of learning proc

        Returns:
            score: (float) model will select weights that achieve the highest score

        """
        # logging
        batch_size = config.batch_size
        train_set.shuffle()
        nbatches = (len(train_set) + batch_size - 1) // batch_size
        prog = Progbar(nbatches)

        # iterate over dataset
        for i, (img, formula) in enumerate(minibatches(train_set, batch_size)):
            # get feed dict
            fd = self._get_feed_dict(img, formula=formula, lr=lr_schedule.lr, dropout=config.dropout)
            # 来试试随机的 dropout
            # random_dropout = 0.5 + random.random() * 0.5
            # fd = self._get_feed_dict(img, formula=formula, lr=lr_schedule.lr, dropout=random_dropout)

            # update step
            _, G_loss_eval = self.sess.run([self.train_op, self.loss], feed_dict=fd)
            if (G_loss_eval <= 1):
                # 等 Generator 的 loss 下降到 1 以下再训练判别器,不然判别器训练了感觉都没啥意义
                _, D_loss_eval = self.sess.run([self.D_optimizer, self.D_loss], feed_dict=fd)
            else:
                D_loss_eval = 0
            prog.update(i + 1, [("D_loss", D_loss_eval),
                                ("G_loss", G_loss_eval),
                                ("G_perplexity", np.exp(G_loss_eval)),
                                ("lr", lr_schedule.lr)])

            # update learning rate
            lr_schedule.update(batch_no=epoch*nbatches + i)

            # 生成summary
            if (i+1) % 10 == 0:
                summary_str = self.sess.run(self.merged, feed_dict=fd)
                self.file_writer.add_summary(summary_str, epoch)  # 将summary 写入文件

            # if (i+1) % 100 == 0:
            #     # 太慢了,读了 100 批次后就保存先,保存的权重要用于调试 attention
            #     self.save_debug_session(epoch, i)

        # logging
        self.logger.info("- Training: {}".format(prog.info))

        # evaluation
        config_eval = Config({
            "dir_answers": self._dir_output + "formulas_val/",
            "batch_size": config.batch_size
        })
        scores = self.evaluate(config_eval, val_set)
        score = scores["perplexity"] + (scores["ExactMatchScore"] + scores["BLEU-4"] + scores["EditDistance"]) / 10
        lr_schedule.update(score=score)

        return score
コード例 #7
0
    def _run_evaluate_epoch(self, config, test_set):
        """Performs an epoch of evaluation
        Args:
            test_set: Dataset instance
            params: (dict) with extra params in it
                - "dir_name": (string)
        Returns:
            scores: (dict) scores["acc"] = 0.85 for instance
        """
        self.model.eval()
        self.encoder.eval()
        self.decoder.eval()
        # initialize containers of references and predictions
        if self._config.decoding == "greedy":
            refs, hyps = [], [[]]
        elif self._config.decoding == "beam_search":
            refs, hyps = [], [[] for i in range(self._config.beam_size)]
        references = list()  # references (true captions) for calculating BLEU-4 score
        hypotheses = list()  # hypotheses (predictions)
        with torch.no_grad():
            nbatches = len(test_set)
            prog = Progbar(nbatches)
            test_loader = torch.utils.data.DataLoader(ImgFormulaDataset(test_set),
                                                      batch_size=nbatches,
                                                      shuffle=True, num_workers=3, pin_memory=True)

            for i, (img, formula) in enumerate(minibatches(test_set, nbatches)):
                # print(type(img), len(img), img[0].shape)
                # print(type(formula), formula)
                # Move to GPU, if available
                img = pad_batch_images_2(img)
                img = torch.FloatTensor(img)  # (N, W, H, C)
                formula, formula_length = pad_batch_formulas(formula, self._vocab.id_pad, self._vocab.id_end)
                img = img.permute(0, 3, 1, 2)  # (N, C, W, H)
                formula = torch.LongTensor(formula)  # (N,)
                img = img.to(self.device)
                formula = formula.to(self.device)

                # Forward prop.
                imgs = self.encoder(img)
                scores, caps_sorted, decode_lengths, alphas, sort_ind = self.decoder(imgs, formula, torch.LongTensor([[len(i)] for i in formula]))

                # Since we decoded starting with <start>, the targets are all words after <start>, up to <end>
                targets = caps_sorted[:, 1:]

                # Remove timesteps that we didn't decode at, or are pads
                # pack_padded_sequence is an easy trick to do this
                scores, _ = pack_padded_sequence(scores, decode_lengths, batch_first=True)
                targets, _ = pack_padded_sequence(targets, decode_lengths, batch_first=True)

                # Calculate loss
                loss = self.criterion(scores, targets)

                print(scores.shape, targets.shape)
                print(loss)

                alpha_c = 1.
                # Add doubly stochastic attention regularization
                loss += alpha_c * ((1. - alphas.sum(dim=1)) ** 2).mean()

                loss_eval = loss.item()

                prog.update(i + 1, [("loss", loss_eval), ("perplexity", np.exp(loss_eval))])

                # Store references (true captions), and hypothesis (prediction) for each image
                # If for n images, we have n hypotheses, and references a, b, c... for each image, we need -
                # references = [[ref1a, ref1b, ref1c], [ref2a, ref2b], ...], hypotheses = [hyp1, hyp2, ...]
                # print("---------------------------------------------------------------formula and prediction :")
                for form, preds in zip(formula, scores):
                    refs.append(form)
                    # print(form, "    ----------    ", preds[0])
                    for i, pred in enumerate(preds):
                        hyps[i].append(pred)

            files = write_answers(refs, hyps, self._vocab.id_to_tok, config.dir_answers, self._vocab.id_end)
            scores = score_files(files[0], files[1])
            # perp = - np.exp(ce_words / float(n_words))
            # scores["perplexity"] = perp

        self.logger.info("- Evaluating: {}".format(prog.info))

        return {
            "perplexity": loss.item()
        }
コード例 #8
0
    op_train = optimizer.minimize(loss)


# for tensorboard
timecode = time.strftime("%y%m%d_%H%M%S", time.gmtime())
fileWriter = tf.summary.FileWriter('tensorboard/' + timecode, tf.get_default_graph())
fileWriter.flush()

print('graph saved to tensorboard')

sess = tf.Session()
sess.run(tf.global_variables_initializer())

feed_dicts = []
batch_size = config.batch_size
for i, (_img, _formula) in enumerate(minibatches(train_set, batch_size)):
    fd = {
        inputs: _img,
        dropout: 0.2,
        training: True,
        learning_rate: 0.0001,
    }
    if _formula is not None:
        _formula, _formula_length = pad_batch_formulas(
            _formula,
            vocab.id_pad,
            vocab.id_end
        )
        fd[formula] = _formula
        fd[formula_length] = _formula_length
    feed_dicts.append(fd)