Ejemplo n.º 1
0
    def train(self,
              sess,
              baseline_steps=0,
              loss_function='xent',
              use_baseline=True,
              **kwargs):
        self.init_training(sess=sess, **kwargs)

        if (loss_function == 'reinforce' and use_baseline
                and baseline_steps > 0
                and self.baseline_step.eval(sess) < baseline_steps):
            utils.log('pre-training reinforce baseline')
            for i in range(baseline_steps - self.baseline_step.eval(sess)):
                self.seq2seq_model.reinforce_step(sess,
                                                  next(self.batch_iterator),
                                                  update_model=False,
                                                  use_sgd=False,
                                                  update_baseline=True)

        utils.log('starting training')
        while True:
            try:
                self.train_step(sess=sess,
                                loss_function=loss_function,
                                use_baseline=use_baseline,
                                **kwargs)
            except utils.EvalException:
                self.save(sess)
                step, score = self.training.scores[-1]
                self.manage_best_checkpoints(step, score)
            except utils.CheckpointException:
                self.save(sess)
Ejemplo n.º 2
0
def load_checkpoint(sess,
                    checkpoint_dir,
                    filename=None,
                    blacklist=(),
                    prefix=None):
    """
    if `filename` is None, we load last checkpoint, otherwise
      we ignore `checkpoint_dir` and load the given checkpoint file.
    """
    if filename is None:
        # load last checkpoint
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt is not None:
            filename = ckpt.model_checkpoint_path
    else:
        checkpoint_dir = os.path.dirname(filename)

    vars_ = []
    var_names = []
    for var in tf.global_variables():
        if prefix is None or var.name.startswith(prefix):
            name = var.name if prefix is None else var.name[len(prefix) + 1:]
            vars_.append(var)
            var_names.append(name)

    var_file = os.path.join(checkpoint_dir, 'vars.pkl')
    if os.path.exists(var_file):
        with open(var_file, 'rb') as f:
            old_names = pickle.load(f)
    else:
        old_names = list(var_names)

    name_mapping = {}
    for name in old_names:
        name_ = name
        for key, value in variable_mapping:
            name_ = re.sub(key, value, name_)
        name_mapping[name] = name_

    var_names_ = []
    for name in var_names:
        for key, value in reverse_mapping:
            name = re.sub(key, value, name)
        var_names_.append(name)
    vars_ = dict(zip(var_names_, vars_))

    variables = {
        old_name[:-2]: vars_[new_name]
        for old_name, new_name in name_mapping.items()
        if new_name in vars_ and not any(prefix in new_name
                                         for prefix in blacklist)
    }

    if filename is not None:
        utils.log('reading model parameters from {}'.format(filename))
        tf.train.Saver(variables).restore(sess, filename)

        utils.debug('retrieved parameters ({})'.format(len(variables)))
        for var in sorted(variables.values(), key=lambda var: var.name):
            utils.debug('  {} {}'.format(var.name, var.get_shape()))
Ejemplo n.º 3
0
    def decode(self, output=None, remove_unk=False, raw_output=False, max_test_size=None, **kwargs):
        utils.log('starting decoding')

        # empty `test` means that we read from standard input, which is not possible with multiple encoders
        # assert len(self.src_ext) == 1 or self.filenames.test
        # check that there is the right number of files for decoding
        # assert not self.filenames.test or len(self.filenames.test) == len(self.src_ext)

        output_file = None
        try:
            output_file = sys.stdout if output is None else open(output, 'w')
            paths = self.filenames.test or [None]
            lines = utils.read_lines(paths, binary=self.binary)

            if max_test_size:
                lines = itertools.islice(lines, max_test_size)

            if not self.filenames.test:   # interactive mode
                batch_size = 1
            else:
                batch_size = self.batch_size
                lines = list(lines)

            hypothesis_iter = self.decode_batch(lines, batch_size, remove_unk=remove_unk)

            for hypothesis, raw in hypothesis_iter:
                if raw_output:
                    hypothesis = raw

                output_file.write(hypothesis + '\n')
                output_file.flush()
        finally:
            if output_file is not None:
                output_file.close()
Ejemplo n.º 4
0
    def eval_step(self):
        # compute loss on dev set
        for prefix, dev_batches in zip(self.dev_prefix, self.dev_batches):
            eval_loss = sum(
                self.seq2seq_model.step(batch, update_model=False).loss *
                len(batch) for batch in dev_batches)
            eval_loss /= sum(map(len, dev_batches))

            utils.log("  {} eval: loss {:.2f}".format(prefix, eval_loss))
Ejemplo n.º 5
0
def load_checkpoint(sess, checkpoint_dir, filename, variables):
    if filename is not None:
        ckpt_file = checkpoint_dir + "/" + filename
        utils.log('reading model parameters from {}'.format(ckpt_file))
        tf.train.Saver(variables).restore(sess, ckpt_file)

        utils.debug('retrieved parameters ({})'.format(len(variables)))
        for var in sorted(variables, key=lambda var: var.name):
            utils.debug('  {} {}'.format(var.name, var.get_shape()))
Ejemplo n.º 6
0
    def manage_best_checkpoints(self, step, score):
        score_filename = os.path.join(self.checkpoint_dir, 'scores.txt')
        # try loading previous scores
        try:
            with open(score_filename) as f:
                # list of pairs (score, step)
                scores = [(float(line.split()[0]), int(line.split()[1]))
                          for line in f]
        except IOError:
            scores = []

        if any(step_ >= step for _, step_ in scores):
            utils.warn('inconsistent scores.txt file')

        best_scores = sorted(scores, reverse=True)[:self.keep_best]

        def full_path(filename):
            return os.path.join(self.checkpoint_dir, filename)

        if any(score_ < score for score_, _ in best_scores) or not best_scores:
            # if this checkpoint is in the top, save it under a special name

            prefix = 'translate-{}.'.format(step)
            dest_prefix = 'best-{}.'.format(step)

            absolute_best = all(score_ < score for score_, _ in best_scores)
            if absolute_best:
                utils.log('new best model')

            for filename in os.listdir(self.checkpoint_dir):
                if filename.startswith(prefix):
                    dest_filename = filename.replace(prefix, dest_prefix)
                    shutil.copy(full_path(filename), full_path(dest_filename))

                    # also copy to `best` if this checkpoint is the absolute best
                    if absolute_best:
                        dest_filename = filename.replace(prefix, 'best.')
                        shutil.copy(full_path(filename),
                                    full_path(dest_filename))

            best_scores = sorted(best_scores + [(score, step)], reverse=True)

            for _, step_ in best_scores[self.keep_best:]:
                # remove checkpoints that are not in the top anymore
                prefix = 'best-{}'.format(step_)
                for filename in os.listdir(self.checkpoint_dir):
                    if filename.startswith(prefix):
                        os.remove(full_path(filename))

        # save scores
        scores.append((score, step))

        with open(score_filename, 'w') as f:
            for score_, step_ in scores:
                f.write('{:.2f} {}\n'.format(score_, step_))
Ejemplo n.º 7
0
    def eval_step(self, sess):
        # compute perplexity on dev set
        for dev_batches in self.dev_batches:
            eval_loss = sum(
                self.seq2seq_model.step(
                    sess, batch, update_model=False,
                    update_baseline=False).loss * len(batch)
                for batch in dev_batches)
            eval_loss /= sum(map(len, dev_batches))

            utils.log("  eval: loss {:.2f}".format(eval_loss))
Ejemplo n.º 8
0
    def eval_step(self, sess):
        # compute perplexity on dev set
        for dev_batches in self.dev_batches:
            eval_loss = sum(
                self.model.step(sess, batch, forward_only=True).loss *
                len(batch) for batch in dev_batches)
            eval_loss /= sum(map(len, dev_batches))

            perplexity = math.exp(eval_loss) if eval_loss < 300 else float(
                'inf')
            utils.log("  eval: perplexity {:.2f}".format(perplexity))
Ejemplo n.º 9
0
 def calculate_true_alignments(self, encoder_inputs, targets, input_length):
     sum_align = []
     for m_inputs, m_targets in zip(encoder_inputs[0], targets[0]):
         single_align = self.calculate_single_align(
             [self.vocab_in[int(item)] for item in m_inputs],
             [self.vocab_out[int(item)] for item in m_targets])
         sum_align.append(single_align)
     utils.log("align_juzhen")
     utils.log([np.array(sum_align)][0])
     utils.log(len([np.array(sum_align)][0]))
     utils.log(len([np.array(sum_align)][0][0]))
     utils.log(len([np.array(sum_align)][0][0][0]))
     return [np.array(sum_align)][0]
Ejemplo n.º 10
0
def save_checkpoint(sess, saver, checkpoint_dir, step=None, name=None):
    var_file = os.path.join(checkpoint_dir, 'vars.pkl')
    name = name or 'translate'
    os.makedirs(checkpoint_dir, exist_ok=True)

    with open(var_file, 'wb') as f:
        var_names = [var.name for var in tf.global_variables()]
        pickle.dump(var_names, f)

    utils.log('saving model to {}'.format(checkpoint_dir))
    checkpoint_path = os.path.join(checkpoint_dir, name)
    saver.save(sess, checkpoint_path, step, write_meta_graph=False)

    utils.log('finished saving model')
Ejemplo n.º 11
0
    def train(self, sess, **kwargs):
        for model in self.models:
            utils.log('initializing {}'.format(model.name))
            model.init_training(sess=sess, **kwargs)

        while True:
            i = np.random.choice(len(self.models), 1, p=self.ratios)[0]
            model = self.models[i]
            try:
                model.train_step(sess=sess, **kwargs)
            except utils.CheckpointException:
                if i == 0:  # only save main model (includes all variables)
                    model.save(sess)
                    step, score = model.training.scores[-1]
                    model.manage_best_checkpoints(step, score)
Ejemplo n.º 12
0
 def save_embedding(self, output_dir):
     os.makedirs(output_dir, exist_ok=True)
     for encoder_or_decoder, vocab in zip(self.encoders + self.decoders,
                                          self.vocabs):
         utils.log('saving embeddings for: {}'.format(
             encoder_or_decoder.name))
         if not (encoder_or_decoder.name == "edits"):
             with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                 embedding_var = tf.get_variable('embedding_' +
                                                 encoder_or_decoder.name)
                 embedding_value = embedding_var.eval()
                 with open(output_dir + "/" + embedding_var.name + ".txt",
                           'w') as file_:
                     for word, i in vocab.vocab.items():
                         file_.write(
                             '%s %s\n' %
                             (word, ' '.join(map(str, embedding_value[i]))))
Ejemplo n.º 13
0
    def decode(self,
               sess,
               beam_size,
               output=None,
               remove_unk=False,
               early_stopping=True,
               use_edits=False,
               **kwargs):
        utils.log('starting decoding')

        # empty `test` means that we read from standard input, which is not possible with multiple encoders
        assert len(self.src_ext) == 1 or self.filenames.test
        # we can't read binary data from standard input
        assert self.filenames.test or self.src_ext[0] not in self.binary_input
        # check that there is the right number of files for decoding
        assert not self.filenames.test or len(self.filenames.test) == len(
            self.src_ext)

        output_file = None
        try:
            output_file = sys.stdout if output is None else open(output, 'w')

            lines = utils.read_lines(self.filenames.test, self.src_ext,
                                     self.binary_input)

            if self.filenames.test is None:  # interactive mode
                batch_size = 1
            else:
                batch_size = self.batch_size
                lines = list(lines)

            hypothesis_iter = self._decode_batch(sess,
                                                 lines,
                                                 batch_size,
                                                 beam_size=beam_size,
                                                 early_stopping=early_stopping,
                                                 remove_unk=remove_unk,
                                                 use_edits=use_edits)

            for hypothesis in hypothesis_iter:
                output_file.write(hypothesis + '\n')
                output_file.flush()
        finally:
            if output_file is not None:
                output_file.close()
Ejemplo n.º 14
0
    def train(self, **kwargs):
        for model in self.models:
            utils.log('initializing {}'.format(model.name))
            model.init_training(**kwargs)

        utils.log('starting training')
        while True:
            i = np.random.choice(len(self.models), 1, p=self.ratios)[0]
            model = self.models[i]
            try:
                model.train_step(**kwargs)
            except (utils.FinishedTrainingException, KeyboardInterrupt):
                utils.log('exiting...')
                self.main_model.save()
                return
            except utils.EvalException:
                if i == 0:
                    model.save()
                    step, score = model.training.scores[-1]
                    model.manage_best_checkpoints(step, score)
            except utils.CheckpointException:
                if i == 0:   # only save main model (includes all variables)
                    model.save()
                    step, score = model.training.scores[-1]
                    model.manage_best_checkpoints(step, score)
Ejemplo n.º 15
0
    def train(self,
              baseline_steps=0,
              loss_function='xent',
              use_baseline=True,
              **kwargs):
        self.init_training(**kwargs)

        if (loss_function == 'reinforce' and use_baseline
                and baseline_steps > 0
                and self.baseline_step.eval() < baseline_steps):
            utils.log('pre-training reinforce baseline')
            for i in range(baseline_steps - self.baseline_step.eval()):
                self.seq2seq_model.reinforce_step(next(self.batch_iterator),
                                                  update_model=False,
                                                  use_sgd=False,
                                                  update_baseline=True)

        utils.log('starting training')
        while True:
            try:
                self.train_step(loss_function=loss_function,
                                use_baseline=use_baseline,
                                **kwargs)
                sys.stdout.flush()
            except (utils.FinishedTrainingException, KeyboardInterrupt):
                utils.log('exiting...')
                self.save()
                return
            except utils.EvalException:
                self.save()
                step, score = self.training.scores[-1]
                self.manage_best_checkpoints(step, score)
            except utils.CheckpointException:
                self.save()
Ejemplo n.º 16
0
def load_checkpoint(sess, checkpoint_dir, filename=None, blacklist=()):
    """ `checkpoint_dir` should be unique to this model
    if `filename` is None, we load last checkpoint, otherwise
      we ignore `checkpoint_dir` and load the given checkpoint file.
    """
    if filename is None:
        # load last checkpoint
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt is not None:
            filename = ckpt.model_checkpoint_path
    else:
        checkpoint_dir = os.path.dirname(filename)

    var_file = os.path.join(checkpoint_dir, 'vars.pkl')

    if os.path.exists(var_file):
        with open(var_file, 'rb') as f:
            var_names = pickle.load(f)
            variables = [
                var for var in tf.global_variables() if var.name in var_names
            ]
    else:
        variables = tf.global_variables()

    # remove variables from blacklist
    variables = [
        var for var in variables
        if not any(prefix in var.name for prefix in blacklist)
    ]

    if filename is not None:
        utils.log('reading model parameters from {}'.format(filename))
        tf.train.Saver(variables).restore(sess, filename)

        utils.debug('retrieved parameters ({})'.format(len(variables)))
        for var in variables:
            utils.debug('  {} {}'.format(var.name, var.get_shape()))
Ejemplo n.º 17
0
    def decode(self, sess, beam_size, output=None, remove_unk=False, **kwargs):
        utils.log('starting decoding')

        # empty `test` means that we read from standard input, which is not possible with multiple encoders
        assert len(self.src_ext) == 1 or self.filenames.test
        # we can't read binary data from standard input
        assert self.filenames.test or self.src_ext[0] not in self.binary_input
        # check that there is the right number of files for decoding
        assert not self.filenames.test or len(self.filenames.test) == len(
            self.src_ext)

        output_file = None
        try:
            output_file = sys.stdout if output is None else open(output, 'w')

            for lines in utils.read_lines(self.filenames.test, self.src_ext,
                                          self.binary_input):
                trg_sentence = self._decode_sentence(sess, lines, beam_size,
                                                     remove_unk)
                output_file.write(trg_sentence + '\n')
                output_file.flush()
        finally:
            if output_file is not None:
                output_file.close()
Ejemplo n.º 18
0
def save_checkpoint(sess, saver, checkpoint_dir, step=None, name=None):
    """ `checkpoint_dir` should be unique to this model """
    var_file = os.path.join(checkpoint_dir, 'vars.pkl')
    name = name or 'translate'

    if not os.path.exists(checkpoint_dir):
        utils.log("creating directory {}".format(checkpoint_dir))
        os.makedirs(checkpoint_dir)

    with open(var_file, 'wb') as f:
        var_names = [var.name for var in tf.all_variables()]
        pickle.dump(var_names, f)

    utils.log('saving model to {}'.format(checkpoint_dir))
    checkpoint_path = os.path.join(checkpoint_dir, name)
    saver.save(sess, checkpoint_path, step, write_meta_graph=False)
    utils.log('finished saving model')
Ejemplo n.º 19
0
    def evaluate(self, score_functions, on_dev=True, output=None, remove_unk=False, max_dev_size=None,
                 raw_output=False, fix_edits=True, max_test_size=None, post_process_script=None,
                 unk_replace=False, **kwargs):
        """
        Decode a dev or test set, and perform evaluation with respect to gold standard, using the provided
        scoring function. If `output` is defined, also save the decoding output to this file.
        When evaluating development data (`on_dev` to True), several dev sets can be specified (`dev_prefix` parameter
        in configuration files), and a score is computed for each of them.

        :param score_function: name of the scoring function used to score and rank models (typically 'bleu_score')
        :param on_dev: if True, evaluate the dev corpus, otherwise evaluate the test corpus
        :param output: save the hypotheses to this file
        :param remove_unk: remove the UNK symbols from the output
        :param max_dev_size: maximum number of lines to read from dev files
        :param max_test_size: maximum number of lines to read from test files
        :param raw_output: save raw decoder output (don't do post-processing like UNK deletion or subword
            concatenation). The evaluation is still done with the post-processed output.
        :param fix_edits: when predicting edit operations, pad shorter hypotheses with KEEP symbols.
        :return: scores of each corpus to evaluate
        """
        utils.log('starting evaluation')

        if on_dev:
            filenames = self.filenames.dev
        else:
            filenames = [self.filenames.test]

        # convert `output` into a list, for zip
        if isinstance(output, str):
            output = [output]
        elif output is None:
            output = [None] * len(filenames)

        scores = []
        utils.log('show output')
        utils.log(output)

        # evaluation on multiple corpora
        for dev_id, (filenames_, output_, prefix) in enumerate(zip(filenames, output, self.dev_prefix)):
            utils.log('filenames, output, self.dev_prefix')
            utils.log(filenames)
            utils.log(output)

            if self.dev_batches:
                dev_batches = self.dev_batches[dev_id]
                dev_loss = sum(self.seq2seq_model.step(batch, update_model=False).loss * len(batch)
                               for batch in dev_batches)
                dev_loss /= sum(map(len, dev_batches))
            else:  # TODO
                dev_loss = 0

            extensions = list(self.extensions)
            if self.ref_ext is not None:
                extensions.append(self.ref_ext)

            lines = list(utils.read_lines(filenames_, binary=self.binary))

            if on_dev and max_dev_size:
                lines = lines[:max_dev_size]
            elif not on_dev and max_test_size:
                lines = lines[:max_test_size]

            hypotheses = []
            references = []
            utils.log("making hypotheses")
            output_file = None
            try:
                if output_ is not None:
                    output_file = open(output_, 'w', encoding='utf-8')

                lines_ = list(zip(*lines))

                src_sentences = list(zip(*lines_[:len(self.src_ext)]))
                trg_sentences = list(zip(*lines_[len(self.src_ext):]))

                utils.log("making decode_batch")
                hypothesis_iter = self.decode_batch(lines, self.batch_size, remove_unk=remove_unk,
                                                    fix_edits=fix_edits, unk_replace=unk_replace)

                for i, (sources, hypothesis, reference) in enumerate(zip(src_sentences, hypothesis_iter,
                                                                         trg_sentences)):
                    if self.ref_ext is not None and on_dev:
                        reference = reference[-1]
                    else:
                        reference = reference[0]  # single output for now

                    hypothesis, raw = hypothesis
                    # hypothesis: [10items],each item is a "token sequence"
                    hypotheses.append(hypothesis)
                    references.append(reference.strip().replace('@@ ', ''))

                    if output_file is not None:
                        if raw_output:
                            hypothesis = raw
                        line = "source:\t" + str(sources) + "\nref:\t" + str(reference) + "\n"
                        for item in hypothesis:
                            line += str(item) + '\n'
                        line += "\n"
                        # line = hypothesis + '\n'
                        output_file.write(line)
                        output_file.flush()

            finally:
                if output_file is not None:
                    output_file.close()

            if post_process_script is not None:
                data = '\n'.join(hypotheses).encode()
                data = Popen([post_process_script], stdout=PIPE, stdin=PIPE).communicate(input=data)[0].decode()
                hypotheses = data.splitlines()

            scores_ = []
            summary = None

            for score_function in score_functions:
                try:
                    if score_function == 'loss':
                        score = dev_loss
                        reversed_ = True
                    else:
                        fun = getattr(evaluation, 'corpus_' + score_function)
                        try:
                            reversed_ = fun.reversed
                        except AttributeError:
                            reversed_ = False

                        func_arg = []
                        for item in hypotheses:
                            func_arg.append(item[0])
                        score, score_summary = fun(func_arg, references)
                        summary = summary or score_summary

                    scores_.append((score_function, score, reversed_))
                except:
                    pass

            score_info = ['{}={:.2f}'.format(key, value) for key, value, _ in scores_]
            score_info.insert(0, prefix)
            if summary:
                score_info.append(summary)

            if self.name is not None:
                score_info.insert(0, self.name)

            utils.log(' '.join(map(str, score_info)))

            # main score
            _, score, reversed_ = scores_[0]
            scores.append(-score if reversed_ else score)

        return scores
Ejemplo n.º 20
0
    def evaluate(self,
                 sess,
                 beam_size,
                 score_function,
                 on_dev=True,
                 output=None,
                 remove_unk=False,
                 max_dev_size=None,
                 script_dir='scripts',
                 early_stopping=True,
                 use_edits=False,
                 **kwargs):
        """
        :param score_function: name of the scoring function used to score and rank models
          (typically 'bleu_score')
        :param on_dev: if True, evaluate the dev corpus, otherwise evaluate the test corpus
        :param output: save the hypotheses to this file
        :param remove_unk: remove the UNK symbols from the output
        :param max_dev_size: maximum number of lines to read from dev files
        :param script_dir: parameter of scoring functions
        :return: scores of each corpus to evaluate
        """
        utils.log('starting decoding')
        assert on_dev or len(self.filenames.test) == len(self.extensions)

        filenames = self.filenames.dev if on_dev else [self.filenames.test]

        # convert `output` into a list, for zip
        if isinstance(output, str):
            output = [output]
        elif output is None:
            output = [None] * len(filenames)

        scores = []

        for filenames_, output_ in zip(
                filenames, output):  # evaluation on multiple corpora
            lines = list(
                utils.read_lines(filenames_, self.extensions,
                                 self.binary_input))
            if on_dev and max_dev_size:
                lines = lines[:max_dev_size]

            hypotheses = []
            references = []

            output_file = None

            try:
                if output_ is not None:
                    output_file = open(output_, 'w')

                *src_sentences, trg_sentences = zip(*lines)
                src_sentences = list(zip(*src_sentences))

                hypothesis_iter = self._decode_batch(
                    sess,
                    src_sentences,
                    self.batch_size,
                    beam_size=beam_size,
                    early_stopping=early_stopping,
                    remove_unk=remove_unk,
                    use_edits=use_edits)
                for sources, hypothesis, reference in zip(
                        src_sentences, hypothesis_iter, trg_sentences):
                    if use_edits:
                        reference = utils.reverse_edits(sources[0], reference)

                    hypotheses.append(hypothesis)
                    references.append(reference.strip().replace('@@ ', ''))

                    if output_file is not None:
                        output_file.write(hypothesis + '\n')
                        output_file.flush()

            finally:
                if output_file is not None:
                    output_file.close()

            # default scoring function is utils.bleu_score
            score, score_summary = getattr(evaluation, score_function)(
                hypotheses, references, script_dir=script_dir)

            # print the scoring information
            score_info = []
            if self.name is not None:
                score_info.append(self.name)
            score_info.append('score={:.2f}'.format(score))
            if score_summary:
                score_info.append(score_summary)

            utils.log(' '.join(map(str, score_info)))
            scores.append(score)

        return scores
Ejemplo n.º 21
0
def main(args=None):
    args = parser.parse_args(args)

    # read config file and default config
    with open('config/default.yaml') as f:
        default_config = utils.AttrDict(yaml.safe_load(f))

    with open(args.config) as f:
        config = utils.AttrDict(yaml.safe_load(f))
        
        if args.learning_rate is not None:
            args.reset_learning_rate = True
        
        # command-line parameters have higher precedence than config file
        for k, v in vars(args).items():
            if v is not None:
                config[k] = v

        # set default values for parameters that are not defined
        for k, v in default_config.items():
            config.setdefault(k, v)

    if config.score_function:
        config.score_functions = evaluation.name_mapping[config.score_function]

    if args.crash_test:
        config.max_train_size = 0

    if not config.debug:
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # disable TensorFlow's debugging logs
    decoding_mode = any(arg is not None for arg in (args.decode, args.eval, args.align))

    # enforce parameter constraints
    assert config.steps_per_eval % config.steps_per_checkpoint == 0, (
        'steps-per-eval should be a multiple of steps-per-checkpoint')
    assert decoding_mode or args.train or args.save or args.save_embedding, (
        'you need to specify at least one action (decode, eval, align, or train)')
    assert not (args.average and args.ensemble)

    if args.train and args.purge:
        utils.log('deleting previous model')
        shutil.rmtree(config.model_dir, ignore_errors=True)

    os.makedirs(config.model_dir, exist_ok=True)

    # copy config file to model directory
    config_path = os.path.join(config.model_dir, 'config.yaml')
    if args.train and not os.path.exists(config_path):
        with open(args.config) as config_file, open(config_path, 'w') as dest_file:
            content = config_file.read()
            content = re.sub(r'model_dir:.*?\n', 'model_dir: {}\n'.format(config.model_dir), content,
                             flags=re.MULTILINE)
            dest_file.write(content)

    # also copy default config
    config_path = os.path.join(config.model_dir, 'default.yaml')
    if args.train and not os.path.exists(config_path):
        shutil.copy('config/default.yaml', config_path)

    # copy source code to model directory
    tar_path =  os.path.join(config.model_dir, 'code.tar.gz')
    if args.train and not os.path.exists(tar_path):
        with tarfile.open(tar_path, "w:gz") as tar:
            for filename in os.listdir('translate'):
                if filename.endswith('.py'):
                    tar.add(os.path.join('translate', filename), arcname=filename)

    logging_level = logging.DEBUG if args.verbose else logging.INFO
    # always log to stdout in decoding and eval modes (to avoid overwriting precious train logs)
    log_path = os.path.join(config.model_dir, config.log_file)
    logger = utils.create_logger(log_path if args.train else None)
    logger.setLevel(logging_level)

    utils.log('label: {}'.format(config.label))
    utils.log('description:\n  {}'.format('\n  '.join(config.description.strip().split('\n'))))

    utils.log(' '.join(sys.argv))  # print command line
    try:  # print git hash
        commit_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode().strip()
        utils.log('commit hash {}'.format(commit_hash))
    except:
        pass

    utils.log('tensorflow version: {}'.format(tf.__version__))

    # log parameters
    utils.debug('program arguments')
    for k, v in sorted(config.items(), key=itemgetter(0)):
        utils.debug('  {:<20} {}'.format(k, pformat(v)))

    if isinstance(config.dev_prefix, str):
        config.dev_prefix = [config.dev_prefix]

    if config.tasks is not None:
        config.tasks = [utils.AttrDict(task) for task in config.tasks]
        tasks = config.tasks
    else:
        tasks = [config]

    for task in tasks:
        for parameter, value in config.items():
            task.setdefault(parameter, value)

        task.encoders = [utils.AttrDict(encoder) for encoder in task.encoders]
        task.decoders = [utils.AttrDict(decoder) for decoder in task.decoders]

        for encoder_or_decoder in task.encoders + task.decoders:
            for parameter, value in task.items():
                encoder_or_decoder.setdefault(parameter, value)

        if args.max_len:
            args.max_input_len = args.max_len
        if args.max_output_len:   # override decoder's max len
            task.decoders[0].max_len = args.max_output_len
        if args.max_input_len:    # override encoder's max len
            task.encoders[0].max_len = args.max_input_len

    config.checkpoint_dir = os.path.join(config.model_dir, 'checkpoints')

    # setting random seeds
    if config.seed is None:
        config.seed = random.randrange(sys.maxsize)
    if config.tf_seed is None:
        config.tf_seed = random.randrange(sys.maxsize)
    utils.log('python random seed: {}'.format(config.seed))
    utils.log('tf random seed:     {}'.format(config.tf_seed))
    random.seed(config.seed)
    tf.set_random_seed(config.tf_seed)

    device = None
    if config.no_gpu:
        device = '/cpu:0'
        device_id = None
    elif config.gpu_id is not None:
        device = '/gpu:{}'.format(config.gpu_id)
        device_id = config.gpu_id
    else:
        device_id = 0

    # hide other GPUs so that TensorFlow won't use memory on them
    os.environ['CUDA_VISIBLE_DEVICES'] = '' if device_id is None else str(device_id)

    utils.log('creating model')
    utils.log('using device: {}'.format(device))

    with tf.device(device):
        if config.weight_scale:
            if config.initializer == 'uniform':
                initializer = tf.random_uniform_initializer(minval=-config.weight_scale, maxval=config.weight_scale)
            else:
                initializer = tf.random_normal_initializer(stddev=config.weight_scale)
        else:
            initializer = None

        tf.get_variable_scope().set_initializer(initializer)

        # exempt from creating gradient ops
        config.decode_only = decoding_mode

        if config.tasks is not None:
            model = MultiTaskModel(**config)
        else:
            model = TranslationModel(**config)

    # count parameters
    # not counting parameters created by training algorithm (e.g. Adam)
    variables = [var for var in tf.global_variables() if not var.name.startswith('gradients')]
    utils.log('model parameters ({})'.format(len(variables)))
    parameter_count = 0
    for var in sorted(variables, key=lambda var: var.name):
        utils.log('  {} {}'.format(var.name, var.get_shape()))
        v = 1
        for d in var.get_shape():
            v *= d.value
        parameter_count += v
    utils.log('number of parameters: {:.2f}M'.format(parameter_count / 1e6))

    tf_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True)
    tf_config.gpu_options.allow_growth = config.allow_growth
    tf_config.gpu_options.per_process_gpu_memory_fraction = config.mem_fraction

    def average_checkpoints(main_sess, sessions):
        for var in tf.global_variables():
            avg_value = sum(sess.run(var) for sess in sessions) / len(sessions)
            main_sess.run(var.assign(avg_value))

    with tf.Session(config=tf_config) as sess:
        best_checkpoint = os.path.join(config.checkpoint_dir, 'best')

        params = {'variable_mapping': config.variable_mapping, 'reverse_mapping': config.reverse_mapping,
                  'rnn_lm_model_dir': None, 'rnn_mt_model_dir': None,
                  'rnn_lm_cell_name': None, 'origin_model_ckpt': None}
        if config.ensemble and len(config.checkpoints) > 1:
            model.initialize(config.checkpoints, **params)
        elif config.average and len(config.checkpoints) > 1:
            model.initialize(reset=True)
            sessions = [tf.Session(config=tf_config) for _ in config.checkpoints]
            for sess_, checkpoint in zip(sessions, config.checkpoints):
                model.initialize(sess=sess_, checkpoints=[checkpoint], **params)
            average_checkpoints(sess, sessions)
        elif (not config.checkpoints and decoding_mode and
             (os.path.isfile(best_checkpoint + '.index') or os.path.isfile(best_checkpoint + '.index'))):
            # in decoding and evaluation mode, unless specified otherwise (by `checkpoints`),
            # try to load the best checkpoint
            model.initialize([best_checkpoint], **params)
        else:
            # loads last checkpoint, unless `reset` is true
            model.initialize(**config)

        if config.output is not None:
            dirname = os.path.dirname(config.output)
            if dirname:
                os.makedirs(dirname, exist_ok=True)

        try:
            if args.save:
                model.save()
            elif args.save_embedding:
                if config.embedding_output_dir is None:
                    output_dir = "."
                else:
                    output_dir = config.embedding_output_dir
                model.save_embedding(output_dir)
            elif args.decode is not None:
                if config.align is not None:
                    config.align = True
                model.decode(**config)
            elif args.eval is not None:
                model.evaluate(on_dev=False, **config)
            elif args.align is not None:
                model.align(**config)
            elif args.train:
                model.train(**config)
        except KeyboardInterrupt:
            sys.exit()
Ejemplo n.º 22
0
    def initialize(self,
                   checkpoints=None,
                   reset=False,
                   reset_learning_rate=False,
                   max_to_keep=1,
                   keep_every_n_hours=0,
                   sess=None,
                   whitelist=None,
                   blacklist=None,
                   **kwargs):
        """
        :param checkpoints: list of checkpoints to load (instead of latest checkpoint)
        :param reset: don't load latest checkpoint, reset learning rate and global step
        :param reset_learning_rate: reset the learning rate to its initial value
        :param max_to_keep: keep this many latest checkpoints at all times
        :param keep_every_n_hours: and keep checkpoints every n hours
        """
        sess = sess or tf.get_default_session()

        if keep_every_n_hours <= 0 or keep_every_n_hours is None:
            keep_every_n_hours = float('inf')

        self.saver = tf.train.Saver(
            max_to_keep=max_to_keep,
            keep_checkpoint_every_n_hours=keep_every_n_hours,
            sharded=False)

        sess.run(tf.global_variables_initializer())

        # load pre-trained embeddings
        for encoder_or_decoder, vocab in zip(self.encoders + self.decoders,
                                             self.vocabs):
            if encoder_or_decoder.embedding_file:
                utils.log('loading embeddings from: {}'.format(
                    encoder_or_decoder.embedding_file))
                embeddings = {}
                with open(encoder_or_decoder.embedding_file) as embedding_file:
                    for line in embedding_file:
                        word, vector = line.split(' ', 1)
                        if word in vocab.vocab:
                            embeddings[word] = np.array(
                                list(map(float, vector.split())))
                # standardize (mean of 0, std of 0.01)
                mean = sum(embeddings.values()) / len(embeddings)
                std = np.sqrt(
                    sum((value - mean)**2
                        for value in embeddings.values())) / (len(embeddings) -
                                                              1)
                for key in embeddings:
                    embeddings[key] = 0.01 * (embeddings[key] - mean) / std

                # change TensorFlow variable's value
                with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                    embedding_var = tf.get_variable('embedding_' +
                                                    encoder_or_decoder.name)
                    embedding_value = embedding_var.eval()
                    for word, i in vocab.vocab.items():
                        if word in embeddings:
                            embedding_value[i] = embeddings[word]
                    sess.run(embedding_var.assign(embedding_value))

        if whitelist:
            with open(whitelist) as f:
                whitelist = list(line.strip() for line in f)
        if blacklist:
            with open(blacklist) as f:
                blacklist = list(line.strip() for line in f)
        else:
            blacklist = []

        blacklist.append('dropout_keep_prob')

        if reset_learning_rate or reset:
            blacklist.append('learning_rate')
        if reset:
            blacklist.append('global_step')

        params = {
            k: kwargs.get(k)
            for k in ('variable_mapping', 'reverse_mapping')
        }

        if checkpoints and len(self.models) > 1:
            assert len(self.models) == len(checkpoints)
            for i, checkpoint in enumerate(checkpoints, 1):
                load_checkpoint(sess,
                                None,
                                checkpoint,
                                blacklist=blacklist,
                                whitelist=whitelist,
                                prefix='model_{}'.format(i),
                                **params)
        elif checkpoints:  # load partial checkpoints
            for checkpoint in checkpoints:  # checkpoint files to load
                load_checkpoint(sess,
                                None,
                                checkpoint,
                                blacklist=blacklist,
                                whitelist=whitelist,
                                **params)
        elif not reset:
            load_checkpoint(sess,
                            self.checkpoint_dir,
                            blacklist=blacklist,
                            whitelist=whitelist,
                            **params)

        utils.debug('global step: {}'.format(self.global_step.eval()))
        utils.debug('baseline step: {}'.format(self.baseline_step.eval()))
Ejemplo n.º 23
0
def main(args=None):
    args = parser.parse_args(args)

    # read config file and default config
    with open('config/default.yaml') as f:
        default_config = utils.AttrDict(yaml.safe_load(f))

    with open(args.config) as f:
        config = utils.AttrDict(yaml.safe_load(f))

        if args.learning_rate is not None:
            args.reset_learning_rate = True

        # command-line parameters have higher precedence than config file
        for k, v in vars(args).items():
            if v is not None:
                config[k] = v

        # set default values for parameters that are not defined
        for k, v in default_config.items():
            config.setdefault(k, v)

    # enforce parameter constraints
    assert config.steps_per_eval % config.steps_per_checkpoint == 0, (
        'steps-per-eval should be a multiple of steps-per-checkpoint')
    assert args.decode is not None or args.eval or args.train or args.align, (
        'you need to specify at least one action (decode, eval, align, or train)'
    )
    assert not (args.avg_checkpoints and args.ensemble)

    if args.purge:
        utils.log('deleting previous model')
        shutil.rmtree(config.model_dir, ignore_errors=True)

    os.makedirs(config.model_dir, exist_ok=True)

    # copy config file to model directory
    config_path = os.path.join(config.model_dir, 'config.yaml')
    if not os.path.exists(config_path):
        shutil.copy(args.config, config_path)

    # also copy default config
    config_path = os.path.join(config.model_dir, 'default.yaml')
    if not os.path.exists(config_path):
        shutil.copy('config/default.yaml', config_path)

    # copy source code to model directory
    tar_path = os.path.join(config.model_dir, 'code.tar.gz')
    if not os.path.exists(tar_path):
        with tarfile.open(tar_path, "w:gz") as tar:
            for filename in os.listdir('translate'):
                if filename.endswith('.py'):
                    tar.add(os.path.join('translate', filename),
                            arcname=filename)

    logging_level = logging.DEBUG if args.verbose else logging.INFO
    # always log to stdout in decoding and eval modes (to avoid overwriting precious train logs)
    log_path = os.path.join(config.model_dir, config.log_file)
    logger = utils.create_logger(log_path if args.train else None)
    logger.setLevel(logging_level)

    utils.log('label: {}'.format(config.label))
    utils.log('description:\n  {}'.format('\n  '.join(
        config.description.strip().split('\n'))))

    utils.log(' '.join(sys.argv))  # print command line
    try:  # print git hash
        commit_hash = subprocess.check_output(['git', 'rev-parse',
                                               'HEAD']).decode().strip()
        utils.log('commit hash {}'.format(commit_hash))
    except:
        pass

    utils.log('tensorflow version: {}'.format(tf.__version__))

    # log parameters
    utils.debug('program arguments')
    for k, v in sorted(config.items(), key=itemgetter(0)):
        utils.debug('  {:<20} {}'.format(k, pformat(v)))

    if isinstance(config.dev_prefix, str):
        config.dev_prefix = [config.dev_prefix]

    if config.tasks is not None:
        config.tasks = [utils.AttrDict(task) for task in config.tasks]
        tasks = config.tasks
    else:
        tasks = [config]

    for task in tasks:
        for parameter, value in config.items():
            task.setdefault(parameter, value)

        task.encoders = [utils.AttrDict(encoder) for encoder in task.encoders]
        task.decoders = [utils.AttrDict(decoder) for decoder in task.decoders]

        for encoder_or_decoder in task.encoders + task.decoders:
            for parameter, value in task.items():
                encoder_or_decoder.setdefault(parameter, value)

    device = None
    if config.no_gpu:
        device = '/cpu:0'
    elif config.gpu_id is not None:
        device = '/gpu:{}'.format(config.gpu_id)

    utils.log('creating model')
    utils.log('using device: {}'.format(device))

    with tf.device(device):
        config.checkpoint_dir = os.path.join(config.model_dir, 'checkpoints')

        if config.weight_scale:
            if config.initializer == 'uniform':
                initializer = tf.random_uniform_initializer(
                    minval=-config.weight_scale, maxval=config.weight_scale)
            else:
                initializer = tf.random_normal_initializer(
                    stddev=config.weight_scale)
        else:
            initializer = None

        tf.get_variable_scope().set_initializer(initializer)

        config.decode_only = args.decode is not None or args.eval or args.align  # exempt from creating gradient ops

        if config.tasks is not None:
            model = MultiTaskModel(**config)
        else:
            model = TranslationModel(**config)

    # count parameters
    utils.log('model parameters ({})'.format(len(tf.global_variables())))
    parameter_count = 0
    for var in tf.global_variables():
        utils.log('  {} {}'.format(var.name, var.get_shape()))

        if not var.name.startswith(
                'gradients'
        ):  # not counting parameters created by training algorithm (e.g. Adam)
            v = 1
            for d in var.get_shape():
                v *= d.value
            parameter_count += v
    utils.log('number of parameters: {:.2f}M'.format(parameter_count / 1e6))

    tf_config = tf.ConfigProto(log_device_placement=False,
                               allow_soft_placement=True)
    tf_config.gpu_options.allow_growth = config.allow_growth
    tf_config.gpu_options.per_process_gpu_memory_fraction = config.mem_fraction

    def average_checkpoints(main_sess, sessions):
        for var in tf.global_variables():
            avg_value = sum(sess.run(var) for sess in sessions) / len(sessions)
            main_sess.run(var.assign(avg_value))

    with tf.Session(config=tf_config) as sess:
        best_checkpoint = os.path.join(config.checkpoint_dir, 'best')

        if ((config.ensemble or config.avg_checkpoints)
                and (args.eval or args.decode is not None)
                and len(config.checkpoints) > 1):
            # create one session for each model in the ensemble
            sessions = [tf.Session() for _ in config.checkpoints]
            for sess_, checkpoint in zip(sessions, config.checkpoints):
                model.initialize(sess_, [checkpoint])

            if config.ensemble:
                sess = sessions
            else:
                sess = sessions[0]
                average_checkpoints(sess, sessions)
        elif (not config.checkpoints
              and (args.eval or args.decode is not None or args.align)
              and (os.path.isfile(best_checkpoint + '.index')
                   or os.path.isfile(best_checkpoint + '.index'))):
            # in decoding and evaluation mode, unless specified otherwise (by `checkpoints`),
            # try to load the best checkpoint)
            model.initialize(sess, [best_checkpoint])
        else:
            # loads last checkpoint, unless `reset` is true
            model.initialize(sess, **config)

        if args.decode is not None:
            model.decode(sess, **config)
        elif args.eval:
            model.evaluate(sess, on_dev=False, **config)
        elif args.align:
            model.align(sess, **config)
        elif args.train:
            try:
                model.train(sess=sess, **config)
            except (KeyboardInterrupt, utils.FinishedTrainingException):
                utils.log('exiting...')
                model.save(sess)
                sys.exit()
Ejemplo n.º 24
0
    def train_step(self,
                   steps_per_checkpoint,
                   model_dir,
                   steps_per_eval=None,
                   max_steps=0,
                   max_epochs=0,
                   eval_burn_in=0,
                   decay_if_no_progress=None,
                   decay_after_n_epoch=None,
                   decay_every_n_epoch=None,
                   sgd_after_n_epoch=None,
                   sgd_learning_rate=None,
                   min_learning_rate=None,
                   loss_function='xent',
                   use_baseline=True,
                   **kwargs):
        if min_learning_rate is not None and self.learning_rate.eval(
        ) < min_learning_rate:
            utils.debug('learning rate is too small: stopping')
            raise utils.FinishedTrainingException
        if 0 < max_steps <= self.global_step.eval(
        ) or 0 < max_epochs <= self.epoch.eval():
            raise utils.FinishedTrainingException

        start_time = time.time()

        if loss_function == 'reinforce':
            step_function = self.seq2seq_model.reinforce_step
        else:
            step_function = self.seq2seq_model.step

        res = step_function(next(self.batch_iterator),
                            update_model=True,
                            use_sgd=self.training.use_sgd,
                            update_baseline=True)

        self.training.loss += res.loss
        self.training.baseline_loss += getattr(res, 'baseline_loss', 0)

        self.training.time += time.time() - start_time
        self.training.steps += 1

        global_step = self.global_step.eval()
        epoch = self.epoch.eval()

        if decay_after_n_epoch is not None and self.batch_size * global_step >= decay_after_n_epoch * self.train_size:
            if decay_every_n_epoch is not None and (
                    self.batch_size * (global_step - self.training.last_decay)
                    >= decay_every_n_epoch * self.train_size):
                self.learning_rate_decay_op.eval()
                utils.debug('  decaying learning rate to: {:.3g}'.format(
                    self.learning_rate.eval()))
                self.training.last_decay = global_step

        if sgd_after_n_epoch is not None and epoch >= sgd_after_n_epoch:
            if not self.training.use_sgd:
                utils.debug('epoch {}, starting to use SGD'.format(epoch + 1))
                self.training.use_sgd = True
                if sgd_learning_rate is not None:
                    self.learning_rate.assign(sgd_learning_rate).eval()
                self.training.last_decay = global_step  # reset learning rate decay

        if steps_per_checkpoint and global_step % steps_per_checkpoint == 0:
            loss = self.training.loss / self.training.steps
            baseline_loss = self.training.baseline_loss / self.training.steps
            step_time = self.training.time / self.training.steps

            summary = 'step {} epoch {} learning rate {:.3g} step-time {:.3f} loss {:.3f}'.format(
                global_step, epoch + 1, self.learning_rate.eval(), step_time,
                loss)

            if self.name is not None:
                summary = '{} {}'.format(self.name, summary)
            if use_baseline and loss_function == 'reinforce':
                summary = '{} baseline-loss {:.4f}'.format(
                    summary, baseline_loss)

            utils.log(summary)

            if decay_if_no_progress and len(
                    self.training.losses) >= decay_if_no_progress:
                if loss >= max(self.training.losses[:decay_if_no_progress]):
                    self.learning_rate_decay_op.eval()

            self.training.losses.append(loss)
            self.training.loss, self.training.time, self.training.steps, self.training.baseline_loss = 0, 0, 0, 0

        if steps_per_eval and global_step % steps_per_eval == 0 and 0 <= eval_burn_in <= global_step:

            eval_dir = 'eval' if self.name is None else 'eval_{}'.format(
                self.name)
            eval_output = os.path.join(model_dir, eval_dir)

            os.makedirs(eval_output, exist_ok=True)

            # if there are several dev files, we define several output files
            output = [
                os.path.join(eval_output,
                             '{}.{}.out'.format(prefix, global_step))
                for prefix in self.dev_prefix
            ]

            kwargs_ = dict(kwargs)
            kwargs_['output'] = output
            score, *_ = self.evaluate(on_dev=True, **kwargs_)
            self.training.scores.append((global_step, score))

        if steps_per_eval and global_step % steps_per_eval == 0:
            raise utils.EvalException
        elif steps_per_checkpoint and global_step % steps_per_checkpoint == 0:
            raise utils.CheckpointException
Ejemplo n.º 25
0
    def train(self,
              sess,
              beam_size,
              steps_per_checkpoint,
              steps_per_eval=None,
              eval_output=None,
              max_steps=0,
              max_epochs=0,
              eval_burn_in=0,
              decay_if_no_progress=5,
              decay_after_n_epoch=None,
              decay_every_n_epoch=None,
              sgd_after_n_epoch=None,
              loss_function='xent',
              baseline_steps=0,
              reinforce_baseline=True,
              reward_function=None,
              use_edits=False,
              **kwargs):
        utils.log('reading training and development data')

        self.global_step = 0
        for model in self.models:
            model.read_data(**kwargs)
            # those parameters are used to track the progress of each task
            model.loss, model.time, model.steps = 0, 0, 0
            model.baseline_loss = 0
            model.previous_losses = []
            global_step = model.global_step.eval(sess)
            model.epoch = model.batch_size * global_step // model.train_size
            model.last_decay = global_step

            for _ in range(global_step):  # read all the data up to this step
                next(model.batch_iterator)

            self.global_step += global_step

        # pre-train baseline
        if loss_function == 'reinforce' and baseline_steps > 0 and reinforce_baseline:
            utils.log('pre-training baseline')
            for model in self.models:
                baseline_loss = 0
                for step in range(1, baseline_steps + 1):
                    baseline_loss += model.baseline_step(
                        sess,
                        reward_function=reward_function,
                        use_edits=use_edits)

                    if step % steps_per_checkpoint == 0:
                        loss = baseline_loss / steps_per_checkpoint
                        baseline_loss = 0
                        utils.log('{} step {} baseline loss {:.4f}'.format(
                            model.name, step, loss))

        utils.log('starting training')
        while True:
            i = np.random.choice(len(self.models), 1, p=self.ratios)[0]
            model = self.models[i]

            start_time = time.time()
            res = model.train_step(sess,
                                   loss_function=loss_function,
                                   reward_function=reward_function,
                                   use_edits=use_edits)
            model.loss += res.loss

            if loss_function == 'reinforce':
                model.baseline_loss += res.baseline_loss

            model.time += time.time() - start_time
            model.steps += 1
            self.global_step += 1
            model_global_step = model.global_step.eval(sess)

            epoch = model.batch_size * model_global_step / model.train_size
            model.epoch = int(epoch) + 1

            if decay_after_n_epoch is not None and epoch >= decay_after_n_epoch:
                if decay_every_n_epoch is not None and (
                        model.batch_size *
                    (model_global_step - model.last_decay) >=
                        decay_every_n_epoch * model.train_size):
                    sess.run(model.learning_rate_decay_op)
                    utils.debug('  decaying learning rate to: {:.4f}'.format(
                        model.learning_rate.eval()))
                    model.last_decay = model_global_step

            if sgd_after_n_epoch is not None and epoch >= sgd_after_n_epoch:
                if not model.use_sgd:
                    utils.debug('  epoch {}, starting to use SGD'.format(
                        model.epoch))
                    model.use_sgd = True

            if steps_per_checkpoint and self.global_step % steps_per_checkpoint == 0:
                for model_ in self.models:
                    if model_.steps == 0:
                        continue

                    loss_ = model_.loss / model_.steps
                    step_time_ = model_.time / model_.steps

                    if loss_function == 'reinforce':
                        baseline_loss_ = ' baseline loss {:.4f}'.format(
                            model_.baseline_loss / model_.steps)
                        model_.baseline_loss = 0
                    else:
                        baseline_loss_ = ''

                    utils.log(
                        '{} step {} epoch {} learning rate {:.4f} step-time {:.4f}{} loss {:.4f}'
                        .format(model_.name,
                                model_.global_step.eval(sess), model.epoch,
                                model_.learning_rate.eval(), step_time_,
                                baseline_loss_, loss_))

                    if decay_if_no_progress and len(
                            model_.previous_losses) >= decay_if_no_progress:
                        if loss_ >= max(
                                model_.previous_losses[:decay_if_no_progress]):
                            sess.run(model_.learning_rate_decay_op)

                    model_.previous_losses.append(loss_)
                    model_.loss, model_.time, model_.steps = 0, 0, 0
                    model_.eval_step(sess)

                self.save(sess)

            if steps_per_eval and self.global_step % steps_per_eval == 0 and 0 <= eval_burn_in <= self.global_step:
                score = 0

                for ratio, model_ in zip(self.ratios, self.models):
                    if eval_output is None:
                        output = None
                    elif len(model_.filenames.dev) > 1:
                        # if there are several dev files, we define several output files
                        # TODO: put dev_prefix into the name of the output file (also in the logging output)
                        output = [
                            '{}.{}.{}.{}'.format(eval_output, i + 1,
                                                 model_.name,
                                                 model_.global_step.eval(sess))
                            for i in range(len(model_.filenames.dev))
                        ]
                    else:
                        output = '{}.{}.{}'.format(
                            eval_output, model_.name,
                            model_.global_step.eval(sess))

                    # kwargs_ = {**kwargs, 'output': output}
                    kwargs_ = dict(kwargs)
                    kwargs_['output'] = output
                    scores_ = model_.evaluate(sess,
                                              beam_size,
                                              on_dev=True,
                                              use_edits=use_edits,
                                              **kwargs_)
                    score_ = scores_[
                        0]  # in case there are several dev files, only the first one counts

                    # if there is a main task, pick best checkpoint according to its score
                    # otherwise use the average score across tasks
                    if self.main_task is None:
                        score += ratio * score_
                    elif model_.name == self.main_task:
                        score = score_

                self.manage_best_checkpoints(self.global_step, score)

            if 0 < max_steps <= self.global_step or 0 < max_epochs <= epoch:
                utils.log('finished training')
                # TODO: save models
                return
Ejemplo n.º 26
0
    def evaluate(self,
                 score_functions,
                 on_dev=True,
                 output=None,
                 remove_unk=False,
                 max_dev_size=None,
                 raw_output=False,
                 fix_edits=True,
                 max_test_size=None,
                 post_process_script=None,
                 unk_replace=False,
                 **kwargs):
        """
        Decode a dev or test set, and perform evaluation with respect to gold standard, using the provided
        scoring function. If `output` is defined, also save the decoding output to this file.
        When evaluating development data (`on_dev` to True), several dev sets can be specified (`dev_prefix` parameter
        in configuration files), and a score is computed for each of them.

        :param score_function: name of the scoring function used to score and rank models (typically 'bleu_score')
        :param on_dev: if True, evaluate the dev corpus, otherwise evaluate the test corpus
        :param output: save the hypotheses to this file
        :param remove_unk: remove the UNK symbols from the output
        :param max_dev_size: maximum number of lines to read from dev files
        :param max_test_size: maximum number of lines to read from test files
        :param raw_output: save raw decoder output (don't do post-processing like UNK deletion or subword
            concatenation). The evaluation is still done with the post-processed output.
        :param fix_edits: when predicting edit operations, pad shorter hypotheses with KEEP symbols.
        :return: scores of each corpus to evaluate
        """
        utils.log('starting evaluation')

        if on_dev:
            filenames = self.filenames.dev
        else:
            filenames = [self.filenames.test]

        # convert `output` into a list, for zip
        if isinstance(output, str):
            output = [output]
        elif output is None:
            output = [None] * len(filenames)

        scores = []

        # evaluation on multiple corpora
        for dev_id, (filenames_, output_, prefix) in enumerate(
                zip(filenames, output, self.dev_prefix)):
            if self.ref_ext is not None:
                filenames_ = filenames_[:len(self.src_ext)] + filenames_[-1:]

            if self.dev_batches:
                dev_batches = self.dev_batches[dev_id]
                dev_loss = sum(
                    self.seq2seq_model.step(batch, update_model=False).loss *
                    len(batch) for batch in dev_batches)
                dev_loss /= sum(map(len, dev_batches))
            else:  # TODO
                dev_loss = 0

            src_lines = list(
                utils.read_lines(filenames_[:len(self.src_ext)],
                                 binary=self.binary[:len(self.src_ext)]))
            trg_lines = list(utils.read_lines([filenames_[len(self.src_ext)]]))

            assert len(trg_lines) % len(src_lines) == 0

            references = []
            ref_count = len(trg_lines) // len(src_lines)
            for i in range(len(src_lines)):
                ref = trg_lines[i * ref_count:(i + 1) * ref_count]
                ref = [
                    ref_[0].strip().replace('@@ ', '').replace('@@', '')
                    for ref_ in ref
                ]
                references.append(ref)

            if on_dev and max_dev_size:
                max_size = max_dev_size
            elif not on_dev and max_test_size:
                max_size = max_test_size
            else:
                max_size = len(src_lines)

            src_lines = src_lines[:max_size]
            references = references[:max_size]

            hypotheses = []
            output_file = None
            try:
                if output_ is not None:
                    output_file = open(output_, 'w')

                hypothesis_iter = self.decode_batch(src_lines,
                                                    self.batch_size,
                                                    remove_unk=remove_unk,
                                                    fix_edits=fix_edits,
                                                    unk_replace=unk_replace)
                if post_process_script is not None:
                    hypotheses, raw = zip(*hypothesis_iter)
                    data = '\n'.join(hypotheses).encode()
                    data = Popen(
                        [post_process_script], stdout=PIPE,
                        stdin=PIPE).communicate(input=data)[0].decode()
                    hypotheses = data.splitlines()
                    hypothesis_iter = zip(hypotheses, raw)

                for i, hypothesis in enumerate(hypothesis_iter):
                    hypothesis, raw = hypothesis
                    hypotheses.append(hypothesis)
                    if output_file is not None:
                        if raw_output:
                            hypothesis = raw
                        output_file.write(hypothesis + '\n')
                        output_file.flush()
            finally:
                if output_file is not None:
                    output_file.close()

            scores_ = []
            summary = None

            for score_function in score_functions:
                try:
                    if score_function != 'bleu':
                        references_ = [ref[0] for ref in references]
                    else:
                        references_ = references

                    if score_function == 'loss':
                        score = dev_loss
                        reversed_ = True
                    else:
                        fun = getattr(evaluation, 'corpus_' + score_function)
                        try:
                            reversed_ = fun.reversed
                        except AttributeError:
                            reversed_ = False
                        score, score_summary = fun(hypotheses, references_)
                        summary = summary or score_summary

                    scores_.append((score_function, score, reversed_))
                except:
                    pass

            score_info = [
                '{}={:.2f}'.format(key, value) for key, value, _ in scores_
            ]
            score_info.insert(0, prefix)
            if summary:
                score_info.append(summary)

            if self.name is not None:
                score_info.insert(0, self.name)

            utils.log(' '.join(map(str, score_info)))

            # main score
            _, score, reversed_ = scores_[0]
            scores.append(-score if reversed_ else score)

        return scores
Ejemplo n.º 27
0
def load_checkpoint(sess, checkpoint_dir, filename=None, blacklist=()):
    """
    if `filename` is None, we load last checkpoint, otherwise
      we ignore `checkpoint_dir` and load the given checkpoint file.
    """
    if filename is None:
        # load last checkpoint
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt is not None:
            filename = ckpt.model_checkpoint_path
    else:
        checkpoint_dir = os.path.dirname(filename)

    var_file = os.path.join(checkpoint_dir, 'vars.pkl')

    def get_variable_by_name(name):
        for var in tf.global_variables():
            if var.name == name:
                return var
        return None

    if os.path.exists(var_file):
        with open(var_file, 'rb') as f:
            var_names = pickle.load(f)

        variables = {}

        for var_name in var_names:
            skip = False
            for var in tf.global_variables():
                name = var.name
                for key, value in reverse_mapping:
                    name = re.sub(key, value, name)
                if var_name == name:
                    variables[var_name] = var
                    skip = True
                    break

            if skip:
                continue

            name = var_name
            for key, value in variable_mapping:
                name = re.sub(key, value, name)

            for var in tf.global_variables():
                if var.name == name:
                    variables[var_name] = var
                    break
    else:
        variables = {var.name: var for var in tf.global_variables()}

    # remove variables from blacklist
    # variables = [var for var in variables if not any(prefix in var.name for prefix in blacklist)]
    variables = {
        name[:-2]: var
        for name, var in variables.items()
        if not any(prefix in name for prefix in blacklist)
    }

    if filename is not None:
        utils.log('reading model parameters from {}'.format(filename))
        tf.train.Saver(variables).restore(sess, filename)

        utils.debug('retrieved parameters ({})'.format(len(variables)))
        for var in sorted(variables.values(), key=lambda var: var.name):
            utils.debug('  {} {}'.format(var.name, var.get_shape()))
Ejemplo n.º 28
0
def main(args=None):
    args = parser.parse_args(args)

    # read config file and default config
    with open('config/default.yaml') as f:
        default_config = utils.AttrDict(yaml.safe_load(f))

    with open(args.config) as f:
        config = utils.AttrDict(yaml.safe_load(f))

        if args.learning_rate is not None:
            args.reset_learning_rate = True

        # command-line parameters have higher precedence than config file
        for k, v in vars(args).items():
            if v is not None:
                config[k] = v

        # set default values for parameters that are not defined
        for k, v in default_config.items():
            config.setdefault(k, v)

    # enforce parameter constraints
    assert config.steps_per_eval % config.steps_per_checkpoint == 0, (
        'steps-per-eval should be a multiple of steps-per-checkpoint')
    assert args.decode is not None or args.eval or args.train or args.align, (
        'you need to specify at least one action (decode, eval, align, or train)'
    )

    if args.purge:
        utils.log('deleting previous model')
        shutil.rmtree(config.model_dir, ignore_errors=True)

    logging_level = logging.DEBUG if args.verbose else logging.INFO
    # always log to stdout in decoding and eval modes (to avoid overwriting precious train logs)
    logger = utils.create_logger(config.log_file if args.train else None)
    logger.setLevel(logging_level)

    utils.log(' '.join(sys.argv))  # print command line
    try:  # print git hash
        commit_hash = subprocess.check_output(['git', 'rev-parse',
                                               'HEAD']).decode().strip()
        utils.log('commit hash {}'.format(commit_hash))
    except:
        pass

    # list of encoder and decoder parameter names (each encoder and decoder can have a different value
    # for those parameters)
    model_parameters = [
        'cell_size', 'layers', 'vocab_size', 'embedding_size',
        'attention_filters', 'attention_filter_length', 'use_lstm',
        'time_pooling', 'attention_window_size', 'dynamic', 'binary',
        'character_level', 'bidir', 'load_embeddings', 'pooling_avg',
        'swap_memory', 'parallel_iterations', 'input_layers',
        'residual_connections', 'attn_size'
    ]
    # TODO: independent model dir for each task
    task_parameters = [
        'data_dir', 'train_prefix', 'dev_prefix', 'vocab_prefix', 'ratio',
        'lm_file', 'learning_rate', 'learning_rate_decay_factor',
        'max_input_len', 'max_output_len', 'encoders', 'decoder'
    ]

    # in case no task is defined (standard mono-task settings), define a "main" task
    config.setdefault('tasks', [{
        'encoders': config.encoders,
        'decoder': config.decoder,
        'name': 'main',
        'ratio': 1.0
    }])
    config.tasks = [utils.AttrDict(task) for task in config.tasks]

    for task in config.tasks:
        for parameter in task_parameters:
            task.setdefault(parameter, config.get(parameter))

        if isinstance(task.dev_prefix,
                      str):  # for back-compatibility with old config files
            task.dev_prefix = [task.dev_prefix]

        # convert dicts to AttrDicts for convenience
        task.encoders = [utils.AttrDict(encoder) for encoder in task.encoders]
        task.decoder = utils.AttrDict(task.decoder)

        for encoder_or_decoder in task.encoders + [task.decoder]:
            # move parameters all the way up from base level to encoder/decoder level:
            # default values for encoder/decoder parameters can be defined at the task level and base level
            # default values for tasks can be defined at the base level
            for parameter in model_parameters:
                if parameter in encoder_or_decoder:
                    continue
                elif parameter in task:
                    encoder_or_decoder[parameter] = task[parameter]
                else:
                    encoder_or_decoder[parameter] = config.get(parameter)

    # log parameters
    utils.log('program arguments')
    for k, v in sorted(config.items(), key=itemgetter(0)):
        if k == 'tasks':
            utils.log('  {:<20}\n{}'.format(k, pformat(v)))
        elif k not in model_parameters and k not in task_parameters:
            utils.log('  {:<20} {}'.format(k, pformat(v)))

    device = None
    if config.no_gpu:
        device = '/cpu:0'
    elif config.gpu_id is not None:
        device = '/gpu:{}'.format(config.gpu_id)

    utils.log('creating model')
    utils.log('using device: {}'.format(device))

    with tf.device(device):
        checkpoint_dir = os.path.join(config.model_dir, 'checkpoints')
        # All parameters except recurrent connexions and attention parameters are initialized with this.
        # Recurrent connexions are initialized with orthogonal matrices, and the parameters of the attention model
        # with a standard deviation of 0.001
        if config.weight_scale:
            initializer = tf.random_normal_initializer(
                stddev=config.weight_scale)
        else:
            initializer = None

        tf.get_variable_scope().set_initializer(initializer)
        decode_only = args.decode is not None or args.eval or args.align  # exempt from creating gradient ops
        model = MultiTaskModel(name='main',
                               checkpoint_dir=checkpoint_dir,
                               decode_only=decode_only,
                               **config)

    utils.log('model parameters ({})'.format(len(tf.global_variables())))
    parameter_count = 0
    for var in tf.global_variables():
        utils.log('  {} {}'.format(var.name, var.get_shape()))

        v = 1
        for d in var.get_shape():
            v *= d.value
        parameter_count += v
    utils.log('number of parameters: {}'.format(parameter_count))

    tf_config = tf.ConfigProto(log_device_placement=False,
                               allow_soft_placement=True)
    tf_config.gpu_options.allow_growth = config.allow_growth
    tf_config.gpu_options.per_process_gpu_memory_fraction = config.mem_fraction

    with tf.Session(config=tf_config) as sess:
        best_checkpoint = os.path.join(checkpoint_dir, 'best')

        if config.ensemble and (args.eval or args.decode is not None):
            # create one session for each model in the ensemble
            sess = [tf.Session() for _ in config.checkpoints]
            for sess_, checkpoint in zip(sess, config.checkpoints):
                model.initialize(sess_, [checkpoint], reset=True)
        elif (not config.checkpoints
              and (args.eval or args.decode is not None or args.align)
              and (os.path.isfile(best_checkpoint + '.index')
                   or os.path.isfile(best_checkpoint + '.index'))):
            # in decoding and evaluation mode, unless specified otherwise (by `checkpoints`),
            # try to load the best checkpoint)
            model.initialize(sess, [best_checkpoint], reset=True)
        else:
            # loads last checkpoint, unless `reset` is true
            model.initialize(sess, **config)

        # Inspect variables:
        # tf.get_variable_scope().reuse_variables()
        # import pdb; pdb.set_trace()
        if args.decode is not None:
            model.decode(sess, **config)
        elif args.eval:
            model.evaluate(sess, on_dev=False, **config)
        elif args.align:
            model.align(sess, **config)
        elif args.train:
            eval_output = os.path.join(config.model_dir, 'eval')
            try:
                model.train(sess, eval_output=eval_output, **config)
            except KeyboardInterrupt:
                utils.log('exiting...')
                model.save(sess)
                sys.exit()
Ejemplo n.º 29
0
    def evaluate(self,
                 score_function,
                 on_dev=True,
                 output=None,
                 remove_unk=False,
                 max_dev_size=None,
                 raw_output=False,
                 fix_edits=True,
                 max_test_size=None,
                 post_process_script=None,
                 **kwargs):
        """
        Decode a dev or test set, and perform evaluation with respect to gold standard, using the provided
        scoring function. If `output` is defined, also save the decoding output to this file.
        When evaluating development data (`on_dev` to True), several dev sets can be specified (`dev_prefix` parameter
        in configuration files), and a score is computed for each of them.

        :param score_function: name of the scoring function used to score and rank models (typically 'bleu_score')
        :param on_dev: if True, evaluate the dev corpus, otherwise evaluate the test corpus
        :param output: save the hypotheses to this file
        :param remove_unk: remove the UNK symbols from the output
        :param max_dev_size: maximum number of lines to read from dev files
        :param max_test_size: maximum number of lines to read from test files
        :param raw_output: save raw decoder output (don't do post-processing like UNK deletion or subword
            concatenation). The evaluation is still done with the post-processed output.
        :param fix_edits: when predicting edit operations, pad shorter hypotheses with KEEP symbols.
        :return: scores of each corpus to evaluate
        """
        utils.log('starting decoding')

        if on_dev:
            filenames = self.filenames.dev
        else:
            filenames = [self.filenames.test]

        # convert `output` into a list, for zip
        if isinstance(output, str):
            output = [output]
        elif output is None:
            output = [None] * len(filenames)

        scores = []

        for filenames_, output_, prefix in zip(
                filenames, output,
                self.dev_prefix):  # evaluation on multiple corpora
            extensions = list(self.extensions)
            if self.ref_ext is not None:
                extensions.append(self.ref_ext)

            lines = list(utils.read_lines(filenames_, binary=self.binary))

            if on_dev and max_dev_size:
                lines = lines[:max_dev_size]
            elif not on_dev and max_test_size:
                lines = lines[:max_test_size]

            hypotheses = []
            references = []

            output_file = None

            try:
                if output_ is not None:
                    output_file = open(output_, 'w')

                lines_ = list(zip(*lines))

                src_sentences = list(zip(*lines_[:len(self.src_ext)]))
                trg_sentences = list(zip(*lines_[len(self.src_ext):]))

                hypothesis_iter = self.decode_batch(lines,
                                                    self.batch_size,
                                                    remove_unk=remove_unk,
                                                    fix_edits=fix_edits)

                for i, (sources, hypothesis, reference) in enumerate(
                        zip(src_sentences, hypothesis_iter, trg_sentences)):
                    if self.ref_ext is not None and on_dev:
                        reference = reference[-1]
                    else:
                        reference = reference[0]  # single output for now

                    hypothesis, raw = hypothesis

                    hypotheses.append(hypothesis)
                    references.append(reference.strip().replace('@@ ', ''))

                    if output_file is not None:
                        if raw_output:
                            hypothesis = raw

                        output_file.write(hypothesis + '\n')
                        output_file.flush()

            finally:
                if output_file is not None:
                    output_file.close()

            if post_process_script is not None:
                data = '\n'.join(hypotheses).encode()
                data = Popen([post_process_script], stdout=PIPE,
                             stdin=PIPE).communicate(input=data)[0].decode()
                hypotheses = data.splitlines()

            # default scoring function is utils.bleu_score
            score, score_summary = getattr(evaluation,
                                           score_function)(hypotheses,
                                                           references)

            # print scoring information
            score_info = [prefix, 'score={:.2f}'.format(score)]

            if score_summary:
                score_info.append(score_summary)

            if self.name is not None:
                score_info.insert(0, self.name)

            utils.log(' '.join(map(str, score_info)))
            scores.append(score)

        return scores
Ejemplo n.º 30
0
    def decode_batch(self, sentence_tuples, batch_size, remove_unk=False, fix_edits=True, unk_replace=False,
                     align=False, reverse=False, output=None):
        utils.log("start decode batch")
        if batch_size == 1:
            batches = ([sentence_tuple] for sentence_tuple in sentence_tuples)  # lazy
        else:
            batch_count = int(math.ceil(len(sentence_tuples) / batch_size))
            batches = [sentence_tuples[i * batch_size:(i + 1) * batch_size] for i in range(batch_count)]

        def map_to_ids(sentence_tuple):
            token_ids = [
                sentence if vocab is None else
                utils.sentence_to_token_ids(sentence, vocab.vocab, character_level=self.character_level.get(ext))
                for ext, vocab, sentence in zip(self.extensions, self.vocabs, sentence_tuple)
            ]
            return token_ids

        line_id = 0
        for batch_id, batch in enumerate(batches):
            token_ids = list(map(map_to_ids, batch))
            batch_token_ids, batch_weights = self.seq2seq_model.greedy_decoding(token_ids, align=unk_replace or align)
            utils.log("batch_token_ids")
            utils.log(batch_token_ids)
            utils.log(len(batch_token_ids))
            utils.log(len(batch_token_ids[0]))
            utils.log(len(batch_token_ids[0][0]))
            utils.log(len(batch_token_ids[0][0][0]))
            batch_token_ids = zip(*batch_token_ids)

            for sentence_id, (src_tokens, trg_token_ids) in enumerate(zip(batch, batch_token_ids)):
                # trg_token_ids, shape(64,10,50), [[[....50num....],[....50num....],[....50num....],....,[....50num....]]]
                line_id += 1

                trg_tokens = []

                # for single_trg_token_id in trg_token_ids:
                # single_trg_token_id, shape(50), [....50num....]
                for trg_token_ids_, vocab in zip(trg_token_ids, self.trg_vocab):
                    # trg_token_ids_, shape(10,50)
                    top_10_trg_tokens = []
                    for single_trg_token_ids in trg_token_ids_:
                        # single_trg_token_ids, [,,,,,,,] 50 nums
                        single_trg_token_ids = list(single_trg_token_ids)
                        if utils.EOS_ID in single_trg_token_ids:
                            single_trg_token_ids = single_trg_token_ids[:single_trg_token_ids.index(utils.EOS_ID)]
                        single_trg_token_ids = [vocab.reverse[i] if i < len(vocab.reverse) else utils._UNK
                                                for i in single_trg_token_ids]
                        top_10_trg_tokens.append(single_trg_token_ids)

                    # trg_token_ids_ = list(trg_token_ids_)  # from np array to list
                    # if utils.EOS_ID in trg_token_ids_:
                    #     trg_token_ids_ = trg_token_ids_[:trg_token_ids_.index(utils.EOS_ID)]
                    #
                    # trg_tokens_ = [vocab.reverse[i] if i < len(vocab.reverse) else utils._UNK
                    #            for i in trg_token_ids_]
                    # trg_tokens.append(trg_tokens_)
                    trg_tokens.append(top_10_trg_tokens)
                    # trg_tokens, shape(64, 10, ?)
                #   beam_trg_tokens.append(trg_tokens)
                #   trg_tokens = []

                if align:
                    weights_ = batch_weights[sentence_id].squeeze()
                    max_len_ = weights_.shape[1]
                    src_tokens_ = src_tokens[0].split()[:max_len_ - 1] + [utils._EOS]
                    src_tokens_ = [token if token in self.src_vocab[0].vocab else utils._UNK for token in src_tokens_]
                    trg_tokens_ = trg_tokens[0][0][:weights_.shape[0] - 1] + [utils._EOS]

                    weights_ = weights_[:len(trg_tokens_), :len(src_tokens_)]
                    output_file = output and '{}.{}.pdf'.format(output, line_id)
                    utils.heatmap(src_tokens_, trg_tokens_, weights_, reverse=reverse, output_file=output_file)

                if unk_replace:
                    weights = batch_weights[sentence_id]
                    src_words = src_tokens[0].split()
                    align_ids = np.argmax(weights[:, :len(src_words)], axis=1)

                    def replace(token, align_id):
                        if token == utils._UNK:
                            token = src_words[align_id]
                            if not token[0].isupper() and self.lexicon is not None and token in self.lexicon:
                                token = self.lexicon[token]
                        return token

                    for i in range(len(trg_tokens[0])):
                        trg_tokens[0][i] = [replace(token, align_id) for align_id, token in
                                            zip(align_ids, trg_tokens[0][i])]

                #########################################################################
                if self.pred_edits:
                    # first output is ops, second output is words
                    raw_hypothesis = ' '.join('_'.join(tokens) for tokens in zip(*trg_tokens))
                    src_words = src_tokens[0].split()
                    trg_tokens = utils.reverse_edits(src_words, trg_tokens, fix=fix_edits)
                    trg_tokens = [token for token in trg_tokens if token not in utils._START_VOCAB]
                    # FIXME: char-level
                else:
                    trg_tokens = trg_tokens[0]
                    raw_hypothesis = []
                    for single_trg_tokens in trg_tokens:
                        single_raw_hypothesis = ''.join(single_trg_tokens) if self.char_output else ' '.join(
                            single_trg_tokens)
                        raw_hypothesis.append(single_raw_hypothesis)
                    # raw_hypothesis = ''.join(trg_tokens) if self.char_output else ' '.join(trg_tokens)

                if remove_unk:
                    for i in range(len(trg_tokens)):
                        trg_tokens[i] = [token for token in trg_tokens[i] if token != utils._UNK]

                if self.char_output:
                    hypothesis = []
                    for i in range(len(trg_tokens)):
                        hypothesis.append(''.join(trg_tokens[i]))
                    # hypothesis = ''.join(trg_tokens)
                else:
                    hypothesis = []
                    for i in range(len(trg_tokens)):
                        hypothesis.append(' '.join(trg_tokens[i]).replace('@@ ', ''))
                    # hypothesis = ' '.join(trg_tokens).replace('@@ ', '')  # merge subwords units

                yield hypothesis, raw_hypothesis