Exemple #1
0
 def __init__(self,
              model_file:str=settings.DEFAULT_MOD_PATH,
              log_file:str=settings.DEFAULT_LOG_PATH,
              dropout:float = 0.3,
              weight_noise:float = 0.0,
              default_layer_dim:int = 512,
              param_init:ParamInitializer=bare(GlorotInitializer),
              bias_init:ParamInitializer=bare(ZeroInitializer),
              save_num_checkpoints:int=1,
              eval_only:bool = False,
              commandline_args = None,
              dynet_param_collection:Optional['ParamCollection'] = None,
              placeholders:Dict[str,str]={}):
   self.model_file = model_file
   self.log_file = log_file
   self.dropout = dropout
   self.weight_noise = weight_noise
   self.default_layer_dim = default_layer_dim
   self.param_init = param_init
   self.bias_init = bias_init
   self.model_file = None
   self.eval_only = eval_only
   self.dynet_param_collection = dynet_param_collection or PersistentParamCollection(model_file, save_num_checkpoints)
   self.commandline_args = commandline_args
   self.placeholders = placeholders
Exemple #2
0
    def __init__(self,
                 exp_global=bare(ExpGlobal),
                 load=None,
                 overwrite=None,
                 preproc=None,
                 model=None,
                 train=None,
                 evaluate=None,
                 random_search_report=None):
        """
    This is called after all other components have been initialized, so we can safely load DyNet weights here. 
    """
        self.exp_global = exp_global
        self.load = load
        self.overwrite = overwrite
        self.preproc = preproc
        self.model = model
        self.train = train
        self.evaluate = evaluate
        if load:
            exp_global.dynet_param_collection.load_from_data_file(
                f"{load}.data")
            logger.info(f"> populated DyNet weights from {load}.data")

        if random_search_report:
            logger.info(
                f"> instantiated random parameter search: {random_search_report}"
            )
Exemple #3
0
  def __init__(self, model=Ref(path=Path("model")), src_file=None, trg_file=None,
               dev_every=0, batcher=bare(xnmt.batcher.SrcBatcher, batch_size=32),
               loss_calculator=None, trainer=None, run_for_epochs=None,
               lr_decay=1.0, lr_decay_times=3, patience=1, initial_patience=None,
               dev_tasks=None, restart_trainer:bool=False, reload_command=None,
               name=None, sample_train_sents=None, max_num_train_sents=None,
               max_src_len=None, max_trg_len=None,
               exp_global=Ref(Path("exp_global"))):

    super().__init__(model=model,
                     src_file=src_file,
                     trg_file=trg_file,
                     dev_every=dev_every,
                     batcher=batcher,
                     loss_calculator=loss_calculator,
                     run_for_epochs=run_for_epochs,
                     lr_decay=lr_decay,
                     lr_decay_times=lr_decay_times,
                     patience=patience,
                     initial_patience=initial_patience,
                     dev_tasks=dev_tasks,
                     restart_trainer=restart_trainer,
                     reload_command=reload_command,
                     name=name,
                     sample_train_sents=sample_train_sents,
                     max_num_train_sents=max_num_train_sents,
                     max_src_len=max_src_len,
                     max_trg_len=max_trg_len,
                     exp_global=exp_global)
    self.trainer = trainer or xnmt.optimizer.SimpleSGDTrainer(exp_global=self.exp_global, e0=0.1)
    self.dynet_profiling = getattr(exp_global.commandline_args, "dynet_profiling", 0)
Exemple #4
0
    def __init__(self,
                 exp_global=bare(ExpGlobal),
                 load: Optional[str] = None,
                 overwrite: Optional[str] = None,
                 preproc: PreprocRunner = None,
                 model: Optional[GeneratorModel] = None,
                 train: TrainingRegimen = None,
                 evaluate: Optional[List[EvalTask]] = None,
                 random_search_report: Optional[dict] = None) -> None:
        """
    This is called after all other components have been initialized, so we can safely load DyNet weights here. 
    """
        self.exp_global = exp_global
        self.load = load
        self.overwrite = overwrite
        self.preproc = preproc
        self.model = model
        self.train = train
        self.evaluate = evaluate
        if load:
            exp_global.dynet_param_collection.load_from_data_file(
                f"{load}.data")
            logger.info(f"> populated DyNet weights from {load}.data")

        if random_search_report:
            logger.info(
                f"> instantiated random parameter search: {random_search_report}"
            )
Exemple #5
0
    def __init__(self,
                 exp_global=Ref(Path("exp_global")),
                 layers=1,
                 input_dim=None,
                 lstm_dim=None,
                 mlp_hidden_dim=None,
                 trg_embed_dim=None,
                 dropout=None,
                 rnn_spec="lstm",
                 residual_to_output=False,
                 input_feeding=True,
                 bridge=bare(CopyBridge),
                 label_smoothing=0.0,
                 vocab_projector=None,
                 vocab_size=None,
                 vocab=None,
                 trg_reader=Ref(path=Path("model.trg_reader"),
                                required=False)):
        register_handler(self)
        self.param_col = exp_global.dynet_param_collection.param_col
        # Define dim
        lstm_dim = lstm_dim or exp_global.default_layer_dim
        self.mlp_hidden_dim = mlp_hidden_dim = mlp_hidden_dim or exp_global.default_layer_dim
        trg_embed_dim = trg_embed_dim or exp_global.default_layer_dim
        input_dim = input_dim or exp_global.default_layer_dim
        self.input_dim = input_dim
        self.label_smoothing = label_smoothing
        # Input feeding
        self.input_feeding = input_feeding
        self.lstm_dim = lstm_dim
        lstm_input = trg_embed_dim
        if input_feeding:
            lstm_input += input_dim
        # Bridge
        self.lstm_layers = layers
        self.bridge = bridge

        # LSTM
        self.fwd_lstm = RnnDecoder.rnn_from_spec(
            spec=rnn_spec,
            num_layers=layers,
            input_dim=lstm_input,
            hidden_dim=lstm_dim,
            model=self.param_col,
            residual_to_output=residual_to_output)
        # MLP
        self.context_projector = xnmt.linear.Linear(input_dim=input_dim +
                                                    lstm_dim,
                                                    output_dim=mlp_hidden_dim,
                                                    model=self.param_col)
        self.vocab_size = self.choose_vocab_size(vocab_size, vocab, trg_reader)
        self.vocab_projector = vocab_projector or xnmt.linear.Linear(
            input_dim=self.mlp_hidden_dim,
            output_dim=self.vocab_size,
            model=self.param_col)

        # Dropout
        self.dropout = dropout or exp_global.dropout
Exemple #6
0
  def __init__(self, src_reader, trg_reader, src_embedder=bare(SimpleWordEmbedder),
               encoder=bare(BiLSTMSeqTransducer), attender=bare(MlpAttender),
               trg_embedder=bare(SimpleWordEmbedder), decoder=bare(MlpSoftmaxDecoder),
               inference=bare(SimpleInference), calc_global_fertility=False, calc_attention_entropy=False):
    '''Constructor.

    :param src_reader: A reader for the source side.
    :param src_embedder: A word embedder for the input language
    :param encoder: An encoder to generate encoded inputs
    :param attender: An attention module
    :param trg_reader: A reader for the target side.
    :param trg_embedder: A word embedder for the output language
    :param decoder: A decoder
    :param inference: The default inference strategy used for this model
    '''
    register_handler(self)
    self.src_reader = src_reader
    self.trg_reader = trg_reader
    self.src_embedder = src_embedder
    self.encoder = encoder
    self.attender = attender
    self.trg_embedder = trg_embedder
    self.decoder = decoder
    self.calc_global_fertility = calc_global_fertility
    self.calc_attention_entropy = calc_attention_entropy
    self.inference = inference
Exemple #7
0
  def __init__(self, exp_global=bare(ExpGlobal), load=None, overwrite=None, preproc=None,
               model=None, train=None, evaluate=None, random_search_report=None):
    self.exp_global = exp_global
    self.load = load
    self.overwrite = overwrite
    self.preproc = preproc
    self.model = model
    self.train = train
    self.evaluate = evaluate
    if load:
      exp_global.dynet_param_collection.load_from_data_file(f"{load}.data")
      logger.info(f"> populated DyNet weights from {load}.data")

    if random_search_report:
      logger.info(f"> instantiated random parameter search: {random_search_report}")
Exemple #8
0
 def __init__(self,
              model_file=settings.DEFAULT_MOD_PATH,
              log_file=settings.DEFAULT_LOG_PATH,
              dropout=0.3,
              weight_noise=0.0,
              default_layer_dim=512,
              param_init=bare(GlorotInitializer),
              bias_init=bare(ZeroInitializer),
              save_num_checkpoints=1,
              eval_only=False,
              commandline_args=None,
              dynet_param_collection=None):
     self.model_file = model_file
     self.log_file = log_file
     self.dropout = dropout
     self.weight_noise = weight_noise
     self.default_layer_dim = default_layer_dim
     self.param_init = param_init
     self.bias_init = bias_init
     self.model_file = None
     self.eval_only = eval_only
     self.dynet_param_collection = dynet_param_collection or PersistentParamCollection(
         model_file, save_num_checkpoints)
     self.commandline_args = commandline_args
Exemple #9
0
 def __init__(self,
              src_reader,
              trg_reader,
              src_embedder=bare(SimpleWordEmbedder),
              encoder=bare(BiLSTMSeqTransducer),
              attender=bare(MlpAttender),
              trg_embedder=bare(SimpleWordEmbedder),
              decoder=bare(MlpSoftmaxDecoder),
              inference=bare(SimpleInference),
              calc_global_fertility=False,
              calc_attention_entropy=False):
     register_handler(self)
     self.src_reader = src_reader
     self.trg_reader = trg_reader
     self.src_embedder = src_embedder
     self.encoder = encoder
     self.attender = attender
     self.trg_embedder = trg_embedder
     self.decoder = decoder
     self.calc_global_fertility = calc_global_fertility
     self.calc_attention_entropy = calc_attention_entropy
     self.inference = inference
    def __init__(self, beam_size, max_len=100, len_norm=bare(NoNormalization)):
        self.beam_size = beam_size
        self.max_len = max_len
        self.len_norm = len_norm

        self.entrs = []
Exemple #11
0
    def __init__(self,
                 exp_global=Ref(Path("exp_global")),
                 layers=1,
                 input_dim=None,
                 lstm_dim=None,
                 mlp_hidden_dim=None,
                 trg_embed_dim=None,
                 dropout=None,
                 rnn_spec="lstm",
                 residual_to_output=False,
                 input_feeding=True,
                 param_init_lstm=None,
                 param_init_context=None,
                 bias_init_context=None,
                 param_init_output=None,
                 bias_init_output=None,
                 bridge=bare(CopyBridge),
                 label_smoothing=0.0,
                 vocab_projector=None,
                 vocab_size=None,
                 vocab=None,
                 trg_reader=Ref(path=Path("model.trg_reader"),
                                required=False)):
        register_handler(self)
        self.param_col = exp_global.dynet_param_collection.param_col
        # Define dim
        lstm_dim = lstm_dim or exp_global.default_layer_dim
        self.mlp_hidden_dim = mlp_hidden_dim = mlp_hidden_dim or exp_global.default_layer_dim
        trg_embed_dim = trg_embed_dim or exp_global.default_layer_dim
        input_dim = input_dim or exp_global.default_layer_dim
        self.input_dim = input_dim
        self.label_smoothing = label_smoothing
        # Input feeding
        self.input_feeding = input_feeding
        self.lstm_dim = lstm_dim
        lstm_input = trg_embed_dim
        if input_feeding:
            lstm_input += input_dim
        # Bridge
        self.lstm_layers = layers
        self.bridge = bridge

        # LSTM
        self.fwd_lstm = RnnDecoder.rnn_from_spec(
            spec=rnn_spec,
            num_layers=layers,
            input_dim=lstm_input,
            hidden_dim=lstm_dim,
            model=self.param_col,
            residual_to_output=residual_to_output)
        param_init_lstm = param_init_lstm or exp_global.param_init
        if not isinstance(param_init_lstm, GlorotInitializer):
            raise NotImplementedError(
                "For the decoder LSTM, only Glorot initialization is currently supported"
            )
        if getattr(param_init_lstm, "gain", 1.0) != 1.0:
            for l in range(layers):
                for i in [0, 1]:
                    self.fwd_lstm.param_collection().parameters_list()[
                        3 * l + i].scale(param_init_lstm.gain)

        # MLP
        self.context_projector = xnmt.linear.Linear(
            input_dim=input_dim + lstm_dim,
            output_dim=mlp_hidden_dim,
            model=self.param_col,
            param_init=param_init_context or exp_global.param_init,
            bias_init=bias_init_context or exp_global.bias_init)
        self.vocab_size = self.choose_vocab_size(vocab_size, vocab, trg_reader)
        self.vocab_projector = vocab_projector or xnmt.linear.Linear(
            input_dim=self.mlp_hidden_dim,
            output_dim=self.vocab_size,
            model=self.param_col,
            param_init=param_init_output or exp_global.param_init,
            bias_init=bias_init_output or exp_global.bias_init)
        # Dropout
        self.dropout = dropout or exp_global.dropout
Exemple #12
0
  def __init__(self, model, src_file=None, trg_file=None, dev_every=0,
               batcher=bare(SrcBatcher, batch_size=32), loss_calculator=None,
               run_for_epochs=None, lr_decay=1.0, lr_decay_times=3, patience=1,
               initial_patience=None, dev_tasks=None, restart_trainer=False,
               reload_command=None, name=None, sample_train_sents=None,
               max_num_train_sents=None, max_src_len=None, max_trg_len=None,
               exp_global=Ref(Path("exp_global"))):
    """
    Args:
      exp_global:
      model: a generator.GeneratorModel object
      src_file: The file for the source data.
      trg_file: The file for the target data.
      dev_every (int): dev checkpoints every n sentences (0 for only after epoch)
      batcher: Type of batcher
      loss_calculator:
      lr_decay (float):
      lr_decay_times (int):  Early stopping after decaying learning rate a certain number of times
      patience (int): apply LR decay after dev scores haven't improved over this many checkpoints
      initial_patience (int): if given, allows adjusting patience for the first LR decay
      dev_tasks: A list of tasks to run on the development set
      restart_trainer: Restart trainer (useful for Adam) and revert weights to best dev checkpoint when applying LR decay (https://arxiv.org/pdf/1706.09733.pdf)
      reload_command: Command to change the input data after each epoch.
                           --epoch EPOCH_NUM will be appended to the command.
                           To just reload the data after each epoch set the command to 'true'.
      sample_train_sents:
      max_num_train_sents:
      max_src_len:
      max_trg_len:
      name: will be prepended to log outputs if given
    """
    self.exp_global = exp_global
    self.model_file = self.exp_global.dynet_param_collection.model_file
    self.src_file = src_file
    self.trg_file = trg_file
    self.dev_tasks = dev_tasks

    if lr_decay > 1.0 or lr_decay <= 0.0:
      raise RuntimeError("illegal lr_decay, must satisfy: 0.0 < lr_decay <= 1.0")
    self.lr_decay = lr_decay
    self.patience = patience
    self.initial_patience = initial_patience
    self.lr_decay_times = lr_decay_times
    self.restart_trainer = restart_trainer
    self.run_for_epochs = run_for_epochs

    self.early_stopping_reached = False
    # training state
    self.training_state = TrainingState()

    self.reload_command = reload_command

    self.model = model
    self.loss_calculator = loss_calculator or LossCalculator(MLELoss())

    self.sample_train_sents = sample_train_sents
    self.max_num_train_sents = max_num_train_sents
    self.max_src_len = max_src_len
    self.max_trg_len = max_trg_len

    self.batcher = batcher
    self.logger = BatchLossTracker(self, dev_every, name)
Exemple #13
0
 def __init__(self,
              model=Ref(path=Path("model")),
              src_file=None,
              trg_file=None,
              dev_every=0,
              batcher=bare(xnmt.batcher.SrcBatcher, batch_size=32),
              loss_calculator=None,
              trainer=None,
              run_for_epochs=None,
              lr_decay=1.0,
              lr_decay_times=3,
              patience=1,
              initial_patience=None,
              dev_tasks=None,
              restart_trainer=False,
              reload_command=None,
              name=None,
              sample_train_sents=None,
              max_num_train_sents=None,
              max_src_len=None,
              max_trg_len=None,
              exp_global=Ref(Path("exp_global"))):
     """
 :param model: a generator.GeneratorModel object
 :param src_file: the source training file
 :param trg_file: the target training file
 :param dev_every (int): dev checkpoints every n sentences (0 for only after epoch)
 :param batcher: Type of batcher
 :param loss_calculator: The method for calculating the loss.
 :param trainer: Trainer object, default is SGD with learning rate 0.1
 :param run_for_epochs:
 :param lr_decay (float):
 :param lr_decay_times (int):  Early stopping after decaying learning rate a certain number of times
 :param patience (int): apply LR decay after dev scores haven't improved over this many checkpoints
 :param initial_patience (int): if given, allows adjusting patience for the first LR decay
 :param dev_tasks: A list of tasks to use during the development stage.
 :param restart_trainer: Restart trainer (useful for Adam) and revert weights to best dev checkpoint when applying LR decay (https://arxiv.org/pdf/1706.09733.pdf)
 :param reload_command: Command to change the input data after each epoch.
                        --epoch EPOCH_NUM will be appended to the command.
                        To just reload the data after each epoch set the command to 'true'.
 :param name: will be prepended to log outputs if given
 :param sample_train_sents:
 :param max_num_train_sents:
 :param max_src_len:
 :param max_trg_len:
 :param exp_global:
 """
     super().__init__(model=model,
                      src_file=src_file,
                      trg_file=trg_file,
                      dev_every=dev_every,
                      batcher=batcher,
                      loss_calculator=loss_calculator,
                      run_for_epochs=run_for_epochs,
                      lr_decay=lr_decay,
                      lr_decay_times=lr_decay_times,
                      patience=patience,
                      initial_patience=initial_patience,
                      dev_tasks=dev_tasks,
                      restart_trainer=restart_trainer,
                      reload_command=reload_command,
                      name=name,
                      sample_train_sents=sample_train_sents,
                      max_num_train_sents=max_num_train_sents,
                      max_src_len=max_src_len,
                      max_trg_len=max_trg_len,
                      exp_global=exp_global)
     self.trainer = trainer or xnmt.optimizer.SimpleSGDTrainer(
         exp_global=self.exp_global, e0=0.1)
     self.dynet_profiling = getattr(exp_global.commandline_args,
                                    "dynet_profiling", 0)