Ejemplo n.º 1
0
  def __init__(self, model, src_file=None, trg_file=None, dev_every=0,
               batcher=bare(SrcBatcher, batch_size=32), loss_calculator=None,
               run_for_epochs=None, lr_decay=1.0, lr_decay_times=3, patience=1,
               initial_patience=None, dev_tasks=None, restart_trainer=False,
               reload_command=None, name=None, sample_train_sents=None,
               max_num_train_sents=None, max_src_len=None, max_trg_len=None,
               exp_global=Ref(Path("exp_global"))):
    """
    Args:
      exp_global:
      model: a generator.GeneratorModel object
      src_file: The file for the source data.
      trg_file: The file for the target data.
      dev_every (int): dev checkpoints every n sentences (0 for only after epoch)
      batcher: Type of batcher
      loss_calculator:
      lr_decay (float):
      lr_decay_times (int):  Early stopping after decaying learning rate a certain number of times
      patience (int): apply LR decay after dev scores haven't improved over this many checkpoints
      initial_patience (int): if given, allows adjusting patience for the first LR decay
      dev_tasks: A list of tasks to run on the development set
      restart_trainer: Restart trainer (useful for Adam) and revert weights to best dev checkpoint when applying LR decay (https://arxiv.org/pdf/1706.09733.pdf)
      reload_command: Command to change the input data after each epoch.
                           --epoch EPOCH_NUM will be appended to the command.
                           To just reload the data after each epoch set the command to 'true'.
      sample_train_sents:
      max_num_train_sents:
      max_src_len:
      max_trg_len:
      name: will be prepended to log outputs if given
    """
    self.exp_global = exp_global
    self.model_file = self.exp_global.dynet_param_collection.model_file
    self.src_file = src_file
    self.trg_file = trg_file
    self.dev_tasks = dev_tasks

    if lr_decay > 1.0 or lr_decay <= 0.0:
      raise RuntimeError("illegal lr_decay, must satisfy: 0.0 < lr_decay <= 1.0")
    self.lr_decay = lr_decay
    self.patience = patience
    self.initial_patience = initial_patience
    self.lr_decay_times = lr_decay_times
    self.restart_trainer = restart_trainer
    self.run_for_epochs = run_for_epochs

    self.early_stopping_reached = False
    # training state
    self.training_state = TrainingState()

    self.reload_command = reload_command

    self.model = model
    self.loss_calculator = loss_calculator or LossCalculator(MLELoss())

    self.sample_train_sents = sample_train_sents
    self.max_num_train_sents = max_num_train_sents
    self.max_src_len = max_src_len
    self.max_trg_len = max_trg_len

    self.batcher = batcher
    self.logger = BatchLossTracker(self, dev_every, name)
Ejemplo n.º 2
0
 def __init__(self,
              tasks,
              trainer=None,
              exp_global=Ref(Path("exp_global"))):
     super().__init__(exp_global=exp_global, tasks=tasks, trainer=trainer)
     self.exp_global = exp_global
Ejemplo n.º 3
0
 def shared_params(self):
     return [
         set([Path(".layers"), Path(".bridge.dec_layers")]),
         set([Path(".lstm_dim"), Path(".bridge.dec_dim")])
     ]
Ejemplo n.º 4
0
 def shared_params(self):
   return [set([Path(".src_embedder.emb_dim"), Path(".encoder.input_dim")]),
           set([Path(".encoder.hidden_dim"), Path(".attender.input_dim"), Path(".decoder.input_dim")]),
           set([Path(".attender.state_dim"), Path(".decoder.lstm_dim")]),
           set([Path(".trg_embedder.emb_dim"), Path(".decoder.trg_embed_dim")])]
Ejemplo n.º 5
0
 def test_eq(self):
     self.assertEqual(Path(""), Path(""))
     self.assertEqual(Path(".."), Path(".."))
     self.assertEqual(Path("one.2"), Path("one.2"))
     self.assertEqual(Path("one.2"), Path("one.2.3").parent())
     self.assertNotEqual(Path("one.2"), Path("one.2.3"))
     self.assertNotEqual(Path(""), Path("."))
Ejemplo n.º 6
0
  def __init__(self, num_layers, input_dim, hidden_dim, add_to_output=False, dropout=None, exp_global=Ref(Path("exp_global"))):
    assert num_layers > 0
    self.builder_layers = []
    self.builder_layers.append(UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim, dropout=dropout))
    for _ in range(num_layers - 1):
      self.builder_layers.append(UniLSTMSeqTransducer(exp_global=exp_global, input_dim=hidden_dim, hidden_dim=hidden_dim, dropout=dropout))

    self.add_to_output = add_to_output
Ejemplo n.º 7
0
 def __init__(self, tasks, task_weights=None, trainer=None, exp_global=Ref(Path("exp_global"))):
   super().__init__(exp_global=exp_global, tasks=tasks, trainer=trainer)
   self.task_weights = task_weights or [1./len(tasks)] * len(tasks)
   self.exp_global = exp_global
Ejemplo n.º 8
0
 def test_add_path(self):
     self.assertEqual(str(Path("one").add_path(Path("2"))), "one.2")
     self.assertEqual(str(Path("one").add_path(Path("2.3"))), "one.2.3")
     self.assertEqual(str(Path("").add_path(Path("2.3"))), "2.3")
     self.assertEqual(str(Path("one.2").add_path(Path(""))), "one.2")
     self.assertEqual(str(Path("").add_path(Path(""))), "")
     self.assertEqual(str(Path(".").add_path(Path(""))), ".")
     self.assertEqual(str(Path(".").add_path(Path("one.two"))), ".one.two")
     self.assertEqual(str(Path(".xy").add_path(Path("one.two"))),
                      ".xy.one.two")
     with self.assertRaises(NotImplementedError):
         Path("one").add_path(Path(".2.3"))
Ejemplo n.º 9
0
 def shared_params(self):
     return [set([Path(".input_dim"), Path(".modules.0.input_dim")])]
Ejemplo n.º 10
0
 def test_ancestors(self):
   self.assertEqual(Path("").ancestors(), set([Path("")]))
   self.assertEqual(Path("a").ancestors(), set([Path(""),Path("a")]))
   self.assertEqual(Path("one.two.three").ancestors(), set([Path(""), Path("one"), Path("one.two"), Path("one.two.three")]))
Ejemplo n.º 11
0
 def __init__(self,
              model=Ref(path=Path("model")),
              src_file=None,
              trg_file=None,
              dev_every=0,
              batcher=bare(xnmt.batcher.SrcBatcher, batch_size=32),
              loss_calculator=None,
              trainer=None,
              run_for_epochs=None,
              lr_decay=1.0,
              lr_decay_times=3,
              patience=1,
              initial_patience=None,
              dev_tasks=None,
              restart_trainer=False,
              reload_command=None,
              name=None,
              sample_train_sents=None,
              max_num_train_sents=None,
              max_src_len=None,
              max_trg_len=None,
              exp_global=Ref(Path("exp_global"))):
     """
 :param model: a generator.GeneratorModel object
 :param src_file: the source training file
 :param trg_file: the target training file
 :param dev_every (int): dev checkpoints every n sentences (0 for only after epoch)
 :param batcher: Type of batcher
 :param loss_calculator: The method for calculating the loss.
 :param trainer: Trainer object, default is SGD with learning rate 0.1
 :param run_for_epochs:
 :param lr_decay (float):
 :param lr_decay_times (int):  Early stopping after decaying learning rate a certain number of times
 :param patience (int): apply LR decay after dev scores haven't improved over this many checkpoints
 :param initial_patience (int): if given, allows adjusting patience for the first LR decay
 :param dev_tasks: A list of tasks to use during the development stage.
 :param restart_trainer: Restart trainer (useful for Adam) and revert weights to best dev checkpoint when applying LR decay (https://arxiv.org/pdf/1706.09733.pdf)
 :param reload_command: Command to change the input data after each epoch.
                        --epoch EPOCH_NUM will be appended to the command.
                        To just reload the data after each epoch set the command to 'true'.
 :param name: will be prepended to log outputs if given
 :param sample_train_sents:
 :param max_num_train_sents:
 :param max_src_len:
 :param max_trg_len:
 :param exp_global:
 """
     super().__init__(model=model,
                      src_file=src_file,
                      trg_file=trg_file,
                      dev_every=dev_every,
                      batcher=batcher,
                      loss_calculator=loss_calculator,
                      run_for_epochs=run_for_epochs,
                      lr_decay=lr_decay,
                      lr_decay_times=lr_decay_times,
                      patience=patience,
                      initial_patience=initial_patience,
                      dev_tasks=dev_tasks,
                      restart_trainer=restart_trainer,
                      reload_command=reload_command,
                      name=name,
                      sample_train_sents=sample_train_sents,
                      max_num_train_sents=max_num_train_sents,
                      max_src_len=max_src_len,
                      max_trg_len=max_trg_len,
                      exp_global=exp_global)
     self.trainer = trainer or xnmt.optimizer.SimpleSGDTrainer(
         exp_global=self.exp_global, e0=0.1)
     self.dynet_profiling = getattr(exp_global.commandline_args,
                                    "dynet_profiling", 0)
Ejemplo n.º 12
0
 def __init__(self,
              dec_layers=1,
              dec_dim=None,
              exp_global=Ref(Path("exp_global"))):
     self.dec_layers = dec_layers
     self.dec_dim = dec_dim or exp_global.default_layer_dim
Ejemplo n.º 13
0
Archivo: lstm.py Proyecto: anhad13/xnmt
 def __init__(self,
              exp_global=Ref(Path("exp_global")),
              layers=1,
              input_dim=None,
              hidden_dim=None,
              dropout=None,
              weightnoise_std=None,
              param_init=None,
              bias_init=None):
     register_handler(self)
     self.num_layers = layers
     input_dim = input_dim or exp_global.default_layer_dim
     hidden_dim = hidden_dim or exp_global.default_layer_dim
     self.hidden_dim = hidden_dim
     self.dropout_rate = dropout or exp_global.dropout
     self.weightnoise_std = weightnoise_std or exp_global.weight_noise
     assert hidden_dim % 2 == 0
     param_init = param_init or exp_global.param_init
     bias_init = bias_init or exp_global.bias_init
     self.forward_layers = [
         UniLSTMSeqTransducer(exp_global=exp_global,
                              input_dim=input_dim,
                              hidden_dim=hidden_dim / 2,
                              dropout=dropout,
                              weightnoise_std=weightnoise_std,
                              param_init=param_init[0] if isinstance(
                                  param_init, Sequence) else param_init,
                              bias_init=bias_init[0] if isinstance(
                                  bias_init, Sequence) else bias_init)
     ]
     self.backward_layers = [
         UniLSTMSeqTransducer(exp_global=exp_global,
                              input_dim=input_dim,
                              hidden_dim=hidden_dim / 2,
                              dropout=dropout,
                              weightnoise_std=weightnoise_std,
                              param_init=param_init[0] if isinstance(
                                  param_init, Sequence) else param_init,
                              bias_init=bias_init[0] if isinstance(
                                  bias_init, Sequence) else bias_init)
     ]
     self.forward_layers += [
         UniLSTMSeqTransducer(exp_global=exp_global,
                              input_dim=hidden_dim,
                              hidden_dim=hidden_dim / 2,
                              dropout=dropout,
                              weightnoise_std=weightnoise_std,
                              param_init=param_init[i] if isinstance(
                                  param_init, Sequence) else param_init,
                              bias_init=bias_init[i] if isinstance(
                                  bias_init, Sequence) else bias_init)
         for i in range(1, layers)
     ]
     self.backward_layers += [
         UniLSTMSeqTransducer(exp_global=exp_global,
                              input_dim=hidden_dim,
                              hidden_dim=hidden_dim / 2,
                              dropout=dropout,
                              weightnoise_std=weightnoise_std,
                              param_init=param_init[i] if isinstance(
                                  param_init, Sequence) else param_init,
                              bias_init=bias_init[i] if isinstance(
                                  bias_init, Sequence) else bias_init)
         for i in range(1, layers)
     ]
Ejemplo n.º 14
0
 def test_str(self):
     self.assertEqual(str(Path("one.2")), "one.2")
     self.assertEqual(str(Path("")), "")
Ejemplo n.º 15
0
  def __init__(self, num_layers, input_dim, hidden_dim, add_to_output=False, dropout=None, exp_global=Ref(Path("exp_global"))):
    """
    :param num_layers: depth of the RNN (> 0)
    :param input_dim: size of the inputs
    :param hidden_dim: size of the outputs (and intermediate layer representations)
    :param model:
    :param add_to_output: whether to add a residual connection to the output layer
    """
    assert num_layers > 0
    self.builder_layers = []
    self.builder_layers.append(UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim, dropout=dropout))
    for _ in range(num_layers - 1):
      self.builder_layers.append(UniLSTMSeqTransducer(exp_global=exp_global, input_dim=hidden_dim, hidden_dim=hidden_dim, dropout=dropout))

    self.add_to_output = add_to_output
Ejemplo n.º 16
0
 def test_set(self):
     s = set([Path("one.2"), Path("one.1.3"), Path("one.1.3")])
     self.assertIn(Path("one.2"), s)
     self.assertEqual(len(s), 2)
Ejemplo n.º 17
0
 def __init__(self, exp_global=Ref(Path("exp_global")), e0=0.1):
     self.optimizer = dy.SimpleSGDTrainer(
         exp_global.dynet_param_collection.param_col, e0)
Ejemplo n.º 18
0
 def test_get_absolute(self):
     self.assertEqual(Path(".").get_absolute(Path("1.2")), Path("1.2"))
     self.assertEqual(
         Path(".x.y").get_absolute(Path("1.2")), Path("1.2.x.y"))
     self.assertEqual(
         Path("..x.y").get_absolute(Path("1.2")), Path("1.x.y"))
     self.assertEqual(Path("...x.y").get_absolute(Path("1.2")), Path("x.y"))
     with self.assertRaises(ValueError):
         Path("....x.y").get_absolute(Path("1.2"))
Ejemplo n.º 19
0
 def __init__(self, exp_global=Ref(Path("exp_global")), e0=0.01, mom=0.9):
     self.optimizer = dy.MomentumSGDTrainer(
         exp_global.dynet_param_collection.param_col, e0, mom)
Ejemplo n.º 20
0
 def __init__(self, num_layers, input_dim, hidden_dim, add_to_output=False, dropout=None, exp_global=Ref(Path("exp_global"))):
   assert num_layers > 1
   assert hidden_dim % 2 == 0
   self.forward_layer = UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim//2, dropout=dropout)
   self.backward_layer = UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim//2, dropout=dropout)
   self.residual_network = ResidualRNNBuilder(exp_global=exp_global, num_layers=num_layers - 1, input_dim=hidden_dim, hidden_dim=hidden_dim,
                                              add_to_output=add_to_output, dropout=dropout)
Ejemplo n.º 21
0
 def __init__(self, exp_global=Ref(Path("exp_global")), e0=0.1, eps=1e-20):
     self.optimizer = dy.AdagradTrainer(
         exp_global.dynet_param_collection.param_col, e0, eps=eps)
Ejemplo n.º 22
0
 def __init__(self, model_file=None, src_file=None, trg_file=None, ref_file=None, max_src_len=None,
                 post_process="none", report_path=None, report_type="html",
                 beam=1, max_len=100, len_norm_type=None, mode="onebest", batcher=Ref(Path("train.batcher"), required=False)):
   self.model_file = model_file
   self.src_file = src_file
   self.trg_file = trg_file
   self.ref_file = ref_file
   self.max_src_len = max_src_len
   self.post_process = post_process
   self.report_path = report_path
   self.report_type = report_type
   self.beam = beam
   self.max_len = max_len
   self.len_norm_type = len_norm_type
   self.mode = mode
   self.batcher = batcher
Ejemplo n.º 23
0
 def __init__(self, exp_global=Ref(Path("exp_global")), eps=1e-6, rho=0.95):
     self.optimizer = dy.AdadeltaTrainer(
         exp_global.dynet_param_collection.param_col, eps, rho)
Ejemplo n.º 24
0
    def __init__(self,
                 exp_global=Ref(Path("exp_global")),
                 layers=1,
                 input_dim=None,
                 lstm_dim=None,
                 mlp_hidden_dim=None,
                 trg_embed_dim=None,
                 dropout=None,
                 rnn_spec="lstm",
                 residual_to_output=False,
                 input_feeding=True,
                 param_init_lstm=None,
                 param_init_context=None,
                 bias_init_context=None,
                 param_init_output=None,
                 bias_init_output=None,
                 bridge=bare(CopyBridge),
                 label_smoothing=0.0,
                 vocab_projector=None,
                 vocab_size=None,
                 vocab=None,
                 trg_reader=Ref(path=Path("model.trg_reader"),
                                required=False)):
        register_handler(self)
        self.param_col = exp_global.dynet_param_collection.param_col
        # Define dim
        lstm_dim = lstm_dim or exp_global.default_layer_dim
        self.mlp_hidden_dim = mlp_hidden_dim = mlp_hidden_dim or exp_global.default_layer_dim
        trg_embed_dim = trg_embed_dim or exp_global.default_layer_dim
        input_dim = input_dim or exp_global.default_layer_dim
        self.input_dim = input_dim
        self.label_smoothing = label_smoothing
        # Input feeding
        self.input_feeding = input_feeding
        self.lstm_dim = lstm_dim
        lstm_input = trg_embed_dim
        if input_feeding:
            lstm_input += input_dim
        # Bridge
        self.lstm_layers = layers
        self.bridge = bridge

        # LSTM
        self.fwd_lstm = RnnDecoder.rnn_from_spec(
            spec=rnn_spec,
            num_layers=layers,
            input_dim=lstm_input,
            hidden_dim=lstm_dim,
            model=self.param_col,
            residual_to_output=residual_to_output)
        param_init_lstm = param_init_lstm or exp_global.param_init
        if not isinstance(param_init_lstm, GlorotInitializer):
            raise NotImplementedError(
                "For the decoder LSTM, only Glorot initialization is currently supported"
            )
        if getattr(param_init_lstm, "gain", 1.0) != 1.0:
            for l in range(layers):
                for i in [0, 1]:
                    self.fwd_lstm.param_collection().parameters_list()[
                        3 * l + i].scale(param_init_lstm.gain)

        # MLP
        self.context_projector = xnmt.linear.Linear(
            input_dim=input_dim + lstm_dim,
            output_dim=mlp_hidden_dim,
            model=self.param_col,
            param_init=param_init_context or exp_global.param_init,
            bias_init=bias_init_context or exp_global.bias_init)
        self.vocab_size = self.choose_vocab_size(vocab_size, vocab, trg_reader)
        self.vocab_projector = vocab_projector or xnmt.linear.Linear(
            input_dim=self.mlp_hidden_dim,
            output_dim=self.vocab_size,
            model=self.param_col,
            param_init=param_init_output or exp_global.param_init,
            bias_init=bias_init_output or exp_global.bias_init)
        # Dropout
        self.dropout = dropout or exp_global.dropout
Ejemplo n.º 25
0
 def __init__(self, exp_global=Ref(Path("exp_global")),
              ## COMPONENTS
              embed_encoder=None, segment_composer=None, final_transducer=None,
              ## OPTIONS
              length_prior=3.3,
              length_prior_alpha=None, # GeometricSequence
              epsilon_greedy=None,     # GeometricSequence
              reinforce_scale=None,    # GeometricSequence
              confidence_penalty=None, # SegmentationConfidencePenalty
              # For segmentation warmup (Always use the poisson prior)
              segmentation_warmup=0,
              ## FLAGS
              learn_delete       = False,
              use_baseline       = True,
              z_normalization    = True,
              learn_segmentation = True,
              compose_char       = False,
              log_reward         = True,
              debug=False,
              print_sample=False):
   register_handler(self)
   model = exp_global.dynet_param_collection.param_col
   # Sanity check
   assert embed_encoder is not None
   assert segment_composer is not None
   assert final_transducer is not None
   # The Embed Encoder transduces the embedding vectors to a sequence of vector
   self.embed_encoder = embed_encoder
   if not hasattr(embed_encoder, "hidden_dim"):
     embed_encoder_dim = yaml_context.default_layer_dim
   else:
     embed_encoder_dim = embed_encoder.hidden_dim
   # The Segment transducer produced word embeddings based on sequence of character embeddings
   self.segment_composer = segment_composer
   # The final transducer
   self.final_transducer = final_transducer
   # Decision layer of segmentation
   self.segment_transform = linear.Linear(input_dim  = embed_encoder_dim,
                                          output_dim = 3 if learn_delete else 2,
                                          model=model)
   # The baseline linear regression model
   self.baseline = linear.Linear(input_dim = embed_encoder_dim,
                                 output_dim = 1,
                                 model = model)
   # Flags
   self.use_baseline = use_baseline
   self.learn_segmentation = learn_segmentation
   self.learn_delete = learn_delete
   self.z_normalization = z_normalization
   self.debug = debug
   self.compose_char = compose_char
   self.print_sample = print_sample
   self.log_reward = log_reward
   # Fixed Parameters
   self.length_prior = length_prior
   self.segmentation_warmup = segmentation_warmup
   # Variable Parameters
   self.length_prior_alpha = length_prior_alpha
   self.lmbd = reinforce_scale
   self.eps = epsilon_greedy
   self.confidence_penalty = confidence_penalty
   # States of the object
   self.train = False