def __init__(self, encoders, decoders, checkpoint_dir, learning_rate, learning_rate_decay_factor, batch_size, keep_best=1, dev_prefix=None, name=None, ref_ext=None, pred_edits=False, dual_output=False, binary=None, truncate_lines=True, ensemble=False, checkpoints=None, beam_size=1, len_normalization=1, lexicon=None, debug=False, **kwargs): self.batch_size = batch_size self.character_level = {} self.binary = [] self.debug = debug for encoder_or_decoder in encoders + decoders: encoder_or_decoder.ext = encoder_or_decoder.ext or encoder_or_decoder.name self.character_level[ encoder_or_decoder.ext] = encoder_or_decoder.character_level self.binary.append(encoder_or_decoder.get('binary', False)) self.encoders, self.decoders = encoders, decoders self.char_output = decoders[0].character_level self.src_ext = [encoder.ext for encoder in encoders] self.trg_ext = [decoder.ext for decoder in decoders] self.extensions = self.src_ext + self.trg_ext self.ref_ext = ref_ext if self.ref_ext is not None: self.binary.append(False) self.pred_edits = pred_edits self.dual_output = dual_output self.dev_prefix = dev_prefix self.name = name self.max_input_len = [encoder.max_len for encoder in encoders] self.max_output_len = [decoder.max_len for decoder in decoders] self.beam_size = beam_size if truncate_lines: self.max_len = None # we let seq2seq.get_batch handle long lines (by truncating them) else: # the line reader will drop lines that are too long self.max_len = dict( zip(self.extensions, self.max_input_len + self.max_output_len)) self.learning_rate = tf.Variable(learning_rate, trainable=False, name='learning_rate', dtype=tf.float32) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) with tf.device('/cpu:0'): self.global_step = tf.Variable(0, trainable=False, name='global_step') self.baseline_step = tf.Variable(0, trainable=False, name='baseline_step') self.filenames = utils.get_filenames(extensions=self.extensions, dev_prefix=dev_prefix, name=name, ref_ext=ref_ext, binary=self.binary, **kwargs) utils.debug('reading vocabularies') self.vocabs = None self.src_vocab, self.trg_vocab = None, None self.read_vocab() for encoder_or_decoder, vocab in zip(encoders + decoders, self.vocabs): if vocab: if encoder_or_decoder.vocab_size: # reduce vocab size vocab.reverse[:] = vocab.reverse[:encoder_or_decoder. vocab_size] for token, token_id in list(vocab.vocab.items()): if token_id >= encoder_or_decoder.vocab_size: del vocab.vocab[token] else: encoder_or_decoder.vocab_size = len(vocab.reverse) utils.debug('creating model') self.models = [] if ensemble and checkpoints is not None: for i, _ in enumerate(checkpoints, 1): with tf.variable_scope('model_{}'.format(i)): model = Seq2SeqModel(encoders, decoders, self.learning_rate, self.global_step, name=name, pred_edits=pred_edits, dual_output=dual_output, baseline_step=self.baseline_step, **kwargs) self.models.append(model) self.seq2seq_model = self.models[0] else: self.seq2seq_model = Seq2SeqModel(encoders, decoders, self.learning_rate, self.global_step, name=name, pred_edits=pred_edits, dual_output=dual_output, baseline_step=self.baseline_step, **kwargs) self.models.append(self.seq2seq_model) self.seq2seq_model.create_beam_op(self.models, len_normalization) self.batch_iterator = None self.dev_batches = None self.train_size = None self.saver = None self.keep_best = keep_best self.checkpoint_dir = checkpoint_dir self.epoch = None self.training = utils.AttrDict() # used to keep track of training if lexicon: with open(lexicon) as lexicon_file: self.lexicon = dict(line.split() for line in lexicon_file) else: self.lexicon = None
def __init__(self, name, encoders, decoder, checkpoint_dir, learning_rate, learning_rate_decay_factor, batch_size, keep_best=1, load_embeddings=None, max_input_len=None, **kwargs): super(TranslationModel, self).__init__(name, checkpoint_dir, keep_best, **kwargs) self.batch_size = batch_size self.src_ext = [ encoder.get('ext') or encoder.name for encoder in encoders ] self.trg_ext = decoder.get('ext') or decoder.name self.extensions = self.src_ext + [self.trg_ext] self.max_input_len = max_input_len encoders_and_decoder = encoders + [decoder] self.binary_input = [ encoder_or_decoder.binary for encoder_or_decoder in encoders_and_decoder ] self.character_level = [ encoder_or_decoder.character_level for encoder_or_decoder in encoders_and_decoder ] self.learning_rate = tf.Variable(learning_rate, trainable=False, name='learning_rate', dtype=tf.float32) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) with tf.device('/cpu:0'): self.global_step = tf.Variable(0, trainable=False, name='global_step') self.filenames = utils.get_filenames(extensions=self.extensions, **kwargs) # TODO: check that filenames exist utils.debug('reading vocabularies') self._read_vocab() for encoder_or_decoder, vocab in zip(encoders + [decoder], self.vocabs): if encoder_or_decoder.vocab_size <= 0 and vocab is not None: encoder_or_decoder.vocab_size = len(vocab.reverse) # this adds an `embedding' attribute to each encoder and decoder utils.read_embeddings(self.filenames.embeddings, encoders + [decoder], load_embeddings, self.vocabs) # main model utils.debug('creating model {}'.format(name)) self.seq2seq_model = Seq2SeqModel(encoders, decoder, self.learning_rate, self.global_step, max_input_len=max_input_len, **kwargs) self.batch_iterator = None self.dev_batches = None self.train_size = None self.use_sgd = False
def __init__(self, encoders, decoders, checkpoint_dir, learning_rate, learning_rate_decay_factor, batch_size, keep_best=1, dev_prefix=None, score_function='corpus_scores', name=None, ref_ext=None, pred_edits=False, dual_output=False, binary=None, **kwargs): self.batch_size = batch_size self.character_level = {} self.binary = [] for encoder_or_decoder in encoders + decoders: encoder_or_decoder.ext = encoder_or_decoder.ext or encoder_or_decoder.name self.character_level[ encoder_or_decoder.ext] = encoder_or_decoder.character_level self.binary.append(encoder_or_decoder.get('binary', False)) self.char_output = decoders[0].character_level self.src_ext = [encoder.ext for encoder in encoders] self.trg_ext = [decoder.ext for decoder in decoders] self.extensions = self.src_ext + self.trg_ext self.ref_ext = ref_ext if self.ref_ext is not None: self.binary.append(False) self.pred_edits = pred_edits self.dual_output = dual_output self.dev_prefix = dev_prefix self.name = name self.max_input_len = [encoder.max_len for encoder in encoders] self.max_output_len = [decoder.max_len for decoder in decoders] self.max_len = dict( zip(self.extensions, self.max_input_len + self.max_output_len)) self.learning_rate = tf.Variable(learning_rate, trainable=False, name='learning_rate', dtype=tf.float32) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) with tf.device('/cpu:0'): self.global_step = tf.Variable(0, trainable=False, name='global_step') self.baseline_step = tf.Variable(0, trainable=False, name='baseline_step') self.filenames = utils.get_filenames(extensions=self.extensions, dev_prefix=dev_prefix, name=name, ref_ext=ref_ext, binary=self.binary, **kwargs) utils.debug('reading vocabularies') self.vocabs = None self.src_vocab, self.trg_vocab = None, None self.read_vocab() for encoder_or_decoder, vocab in zip(encoders + decoders, self.vocabs): if vocab: encoder_or_decoder.vocab_size = len(vocab.reverse) utils.debug('creating model') self.seq2seq_model = Seq2SeqModel(encoders, decoders, self.learning_rate, self.global_step, name=name, pred_edits=pred_edits, dual_output=dual_output, baseline_step=self.baseline_step, **kwargs) self.batch_iterator = None self.dev_batches = None self.train_size = None self.saver = None self.keep_best = keep_best self.checkpoint_dir = checkpoint_dir self.training = utils.AttrDict() # used to keep track of training try: self.reversed_scores = getattr( evaluation, score_function).reversed # the lower the better except AttributeError: self.reversed_scores = False # the higher the better