Esempio n. 1
0
 def __init__(
     self,
     input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     embedder: embedders.Embedder = bare(embedders.LookupEmbedder),
     input_feeding: bool = True,
     bridge: bridges.Bridge = bare(bridges.CopyBridge),
     rnn: recurrent.UniLSTMSeqTransducer = bare(
         recurrent.UniLSTMSeqTransducer),
     transform: transforms.Transform = bare(transforms.AuxNonLinear),
     scorer: scorers.Scorer = bare(scorers.Softmax),
     truncate_dec_batches: bool = Ref("exp_global.truncate_dec_batches",
                                      default=False)
 ) -> None:
     self.param_col = param_collections.ParamManager.my_params(self)
     self.input_dim = input_dim
     self.embedder = embedder
     self.truncate_dec_batches = truncate_dec_batches
     self.bridge = bridge
     self.rnn = rnn
     self.transform = transform
     self.scorer = scorer
     # Input feeding
     self.input_feeding = input_feeding
     rnn_input_dim = embedder.emb_dim
     if input_feeding:
         rnn_input_dim += input_dim
     assert rnn_input_dim == rnn.total_input_dim, "Wrong input dimension in RNN layer: {} != {}".format(
         rnn_input_dim, rnn.total_input_dim)
Esempio n. 2
0
    def __init__(
        self,
        layers: numbers.Integral,
        input_dim: numbers.Integral,
        hidden_dim: numbers.Integral,
        param_init: param_initializers.ParamInitializer = Ref(
            "exp_global.param_init",
            default=bare(param_initializers.GlorotInitializer)),
        bias_init: param_initializers.ParamInitializer = Ref(
            "exp_global.bias_init",
            default=bare(param_initializers.ZeroInitializer))
    ) -> None:
        if layers != 1:
            raise RuntimeError(
                "CustomLSTMSeqTransducer supports only exactly one layer")
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        model = param_collections.ParamManager.my_params(self)

        # [i; f; o; g]
        self.p_Wx = model.add_parameters(dim=(hidden_dim * 4, input_dim),
                                         init=param_init.initializer(
                                             (hidden_dim * 4, input_dim)))
        self.p_Wh = model.add_parameters(dim=(hidden_dim * 4, hidden_dim),
                                         init=param_init.initializer(
                                             (hidden_dim * 4, hidden_dim)))
        self.p_b = model.add_parameters(dim=(hidden_dim * 4, ),
                                        init=bias_init.initializer(
                                            (hidden_dim * 4, )))
Esempio n. 3
0
 def __init__(
     self,
     layers: numbers.Integral = 1,
     input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     dropout: numbers.Real = Ref("exp_global.dropout", default=0.0),
     forward_layers: Optional[Sequence[LatticeLSTMTransducer]] = None,
     backward_layers: Optional[Sequence[LatticeLSTMTransducer]] = None
 ) -> None:
     self.num_layers = layers
     input_dim = input_dim
     hidden_dim = hidden_dim
     self.hidden_dim = hidden_dim
     self.dropout_rate = dropout
     assert hidden_dim % 2 == 0
     self.forward_layers = self.add_serializable_component(
         "forward_layers", forward_layers,
         lambda: self._make_dir_layers(input_dim=input_dim,
                                       hidden_dim=hidden_dim,
                                       dropout=dropout,
                                       layers=layers))
     self.backward_layers = self.add_serializable_component(
         "backward_layers", backward_layers,
         lambda: self._make_dir_layers(input_dim=input_dim,
                                       hidden_dim=hidden_dim,
                                       dropout=dropout,
                                       layers=layers))
Esempio n. 4
0
 def __init__(
         self,
         input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
         hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
         output_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
         bias: bool = True,
         activation: str = 'tanh',
         hidden_layers: numbers.Integral = 1,
         param_init: param_initializers.ParamInitializer = Ref(
             "exp_global.param_init",
             default=bare(param_initializers.GlorotInitializer)),
         bias_init: param_initializers.ParamInitializer = Ref(
             "exp_global.bias_init",
             default=bare(param_initializers.ZeroInitializer)),
         layers: Optional[Sequence[Transform]] = None) -> None:
     self.layers = self.add_serializable_component(
         "layers", layers,
         lambda: MLP._create_layers(num_layers=hidden_layers,
                                    input_dim=input_dim,
                                    hidden_dim=hidden_dim,
                                    output_dim=output_dim,
                                    bias=bias,
                                    activation=activation,
                                    param_init=param_init,
                                    bias_init=bias_init))
Esempio n. 5
0
 def __init__(
     self,
     input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     state_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     param_init: param_initializers.ParamInitializer = Ref(
         "exp_global.param_init",
         default=bare(param_initializers.GlorotInitializer)),
     bias_init: param_initializers.ParamInitializer = Ref(
         "exp_global.bias_init",
         default=bare(param_initializers.ZeroInitializer))
 ) -> None:
     self.input_dim = input_dim
     self.state_dim = state_dim
     self.hidden_dim = hidden_dim
     my_params = param_collections.ParamManager.my_params(self)
     self.linear_context = my_params.add_parameters(
         (hidden_dim, input_dim),
         init=param_init[0].initializer((hidden_dim, input_dim)))
     self.linear_query = my_params.add_parameters(
         (hidden_dim, state_dim),
         init=param_init[1].initializer((hidden_dim, state_dim)))
     self.bias_context = my_params.add_parameters(
         (hidden_dim, ), init=bias_init.initializer((hidden_dim, )))
     self.pU = my_params.add_parameters((1, hidden_dim),
                                        init=param_init[2].initializer(
                                            (1, hidden_dim)))
     self.curr_sent = None
     self.attention_vecs = None
     self.WI = None
Esempio n. 6
0
    def __init__(
            self,
            layers: numbers.Integral = 1,
            input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
            hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
            downsampling_method: str = "concat",
            reduce_factor: Union[numbers.Integral,
                                 Sequence[numbers.Integral]] = 2,
            dropout: float = Ref("exp_global.dropout", default=0.0),
            builder_layers: Any = None):
        self.dropout = dropout
        assert layers > 0
        assert hidden_dim % 2 == 0
        assert type(reduce_factor) == int or (type(reduce_factor) == list and
                                              len(reduce_factor) == layers - 1)
        assert downsampling_method in ["concat", "skip"]

        self.downsampling_method = downsampling_method
        self.reduce_factor = reduce_factor
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.builder_layers = self.add_serializable_component(
            "builder_layers", builder_layers, lambda: self.make_builder_layers(
                input_dim, hidden_dim, layers, dropout, downsampling_method,
                reduce_factor))
Esempio n. 7
0
 def __init__(
     self,
     emb_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     weight_noise: numbers.Real = Ref("exp_global.weight_noise",
                                      default=0.0),
     word_dropout: numbers.Real = 0.0,
     fix_norm: Optional[numbers.Real] = None,
     param_init: param_initializers.ParamInitializer = Ref(
         "exp_global.param_init",
         default=bare(param_initializers.GlorotInitializer)),
     vocab_size: Optional[numbers.Integral] = None,
     vocab: Optional[vocabs.Vocab] = None,
     yaml_path=None,
     src_reader: Optional[input_readers.InputReader] = Ref(
         "model.src_reader", default=None),
     trg_reader: Optional[input_readers.InputReader] = Ref(
         "model.trg_reader", default=None)
 ) -> None:
     #print(f"embedder received param_init: {param_init}")
     self.emb_dim = emb_dim
     self.weight_noise = weight_noise
     self.word_dropout = word_dropout
     self.fix_norm = fix_norm
     self.word_id_mask = None
     self.train = False
     param_collection = param_collections.ParamManager.my_params(self)
     self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path,
                                              src_reader, trg_reader)
     self.save_processed_arg("vocab_size", self.vocab_size)
     self.embeddings = param_collection.add_lookup_parameters(
         (self.vocab_size, self.emb_dim),
         init=param_init.initializer((self.vocab_size, self.emb_dim),
                                     is_lookup=True))
Esempio n. 8
0
 def __init__(self,
              max_pos: numbers.Integral,
              op: str = 'sum',
              emb_type: str = 'param',
              input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
              dropout: numbers.Real = Ref("exp_global.dropout", default=0.0),
              param_init: param_initializers.ParamInitializer = Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer))) \
         -> None:
     """
 max_pos: largest embedded position
 op: how to combine positional encodings with the original encodings, can be "sum" or "concat"
 type: what type of embddings to use, "param"=parameterized (others, such as the trigonometric embeddings are todo)
 input_dim: embedding size
 dropout: apply dropout to output of this transducer
 param_init: how to initialize embedding matrix
 """
     self.max_pos = max_pos
     self.input_dim = input_dim
     self.dropout = dropout
     self.op = op
     self.emb_type = emb_type
     my_params = param_collections.ParamManager.my_params(self)
     self.embeddings = nn.Embedding(self.max_pos,
                                    self.input_dim).to(xnmt.device)
     my_params.append(self.embeddings)
     my_params.init_params(param_init)
Esempio n. 9
0
 def __init__(self,
              layers=1,
              input_dim=Ref("exp_global.default_layer_dim"),
              hidden_dim=Ref("exp_global.default_layer_dim"),
              downsampling_method="concat",
              reduce_factor=2,
              dropout=Ref("exp_global.dropout", default=0.0)):
     self.dropout = dropout
     assert layers > 0
     assert hidden_dim % 2 == 0
     assert type(reduce_factor) == int or (type(reduce_factor) == list and
                                           len(reduce_factor) == layers - 1)
     assert downsampling_method in ["concat", "skip"]
     self.builder_layers = []
     self.downsampling_method = downsampling_method
     self.reduce_factor = reduce_factor
     self.input_dim = input_dim
     f = UniLSTMSeqTransducer(input_dim=input_dim,
                              hidden_dim=hidden_dim / 2,
                              dropout=dropout)
     b = UniLSTMSeqTransducer(input_dim=input_dim,
                              hidden_dim=hidden_dim / 2,
                              dropout=dropout)
     self.builder_layers.append((f, b))
     for _ in range(layers - 1):
         layer_input_dim = hidden_dim if downsampling_method == "skip" else hidden_dim * reduce_factor
         f = UniLSTMSeqTransducer(input_dim=layer_input_dim,
                                  hidden_dim=hidden_dim / 2,
                                  dropout=dropout)
         b = UniLSTMSeqTransducer(input_dim=layer_input_dim,
                                  hidden_dim=hidden_dim / 2,
                                  dropout=dropout)
         self.builder_layers.append((f, b))
Esempio n. 10
0
 def __init__(
     self,
     input_dim: int = Ref("exp_global.default_layer_dim"),
     trg_embed_dim: int = Ref("exp_global.default_layer_dim"),
     input_feeding: bool = True,
     bridge: Bridge = bare(CopyBridge),
     rnn: UniLSTMSeqTransducer = bare(UniLSTMSeqTransducer),
     transform: Transform = bare(AuxNonLinear),
     scorer: Scorer = bare(Softmax),
     truncate_dec_batches: bool = Ref("exp_global.truncate_dec_batches",
                                      default=False)
 ) -> None:
     self.param_col = ParamManager.my_params(self)
     self.input_dim = input_dim
     self.truncate_dec_batches = truncate_dec_batches
     self.bridge = bridge
     self.rnn = rnn
     self.transform = transform
     self.scorer = scorer
     # Input feeding
     self.input_feeding = input_feeding
     rnn_input_dim = trg_embed_dim
     if input_feeding:
         rnn_input_dim += input_dim
     assert rnn_input_dim == rnn.input_dim, "Wrong input dimension in RNN layer: {} != {}".format(
         rnn_input_dim, rnn.input_dim)
Esempio n. 11
0
  def __init__(self,
               filename,
               emb_dim=Ref("exp_global.default_layer_dim"),
               weight_noise=Ref("exp_global.weight_noise", default=0.0),
               word_dropout=0.0,
               fix_norm = None,
               vocab = None,
               yaml_path = None,
               src_reader = Ref("model.src_reader", default=None),
               trg_reader = Ref("model.trg_reader", default=None)):
    self.emb_dim = emb_dim
    self.weight_noise = weight_noise
    self.word_dropout = word_dropout
    self.word_id_mask = None
    self.train = False
    self.fix_norm = fix_norm
    self.pretrained_filename = filename
    param_collection = ParamManager.my_params(self)
    self.vocab = self.choose_vocab(vocab, yaml_path, src_reader, trg_reader)
    self.vocab_size = len(vocab)
    self.save_processed_arg("vocab", self.vocab)
    with open(self.pretrained_filename, encoding='utf-8') as embeddings_file:
      total_embs, in_vocab, missing, initial_embeddings = self._read_fasttext_embeddings(vocab, embeddings_file)
    self.embeddings = param_collection.lookup_parameters_from_numpy(initial_embeddings)

    logger.info(f"{in_vocab} vocabulary matches out of {total_embs} total embeddings; "
                f"{missing} vocabulary words without a pretrained embedding out of {self.vocab_size}")
Esempio n. 12
0
 def __init__(
     self,
     emb_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     weight_noise: numbers.Real = Ref("exp_global.weight_noise",
                                      default=0.0),
     word_dropout: numbers.Real = 0.0,
     fix_norm: Optional[numbers.Real] = None,
     param_init: param_initializers.ParamInitializer = Ref(
         "exp_global.param_init",
         default=bare(param_initializers.GlorotInitializer)),
     vocab_size: Optional[numbers.Integral] = None,
     vocab: Optional[vocabs.Vocab] = None,
     yaml_path: Path = Path(),
     src_reader: Optional[input_readers.InputReader] = Ref(
         "model.src_reader", default=None),
     trg_reader: Optional[input_readers.InputReader] = Ref(
         "model.trg_reader", default=None)
 ) -> None:
     self.emb_dim = emb_dim
     self.weight_noise = weight_noise
     self.word_dropout = word_dropout
     self.fix_norm = fix_norm
     self.word_id_mask = None
     self.train = False
     self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path,
                                              src_reader, trg_reader)
     self.save_processed_arg("vocab_size", self.vocab_size)
     self.embeddings = nn.Embedding(self.vocab_size,
                                    self.emb_dim).to(xnmt.device)
     my_params = param_collections.ParamManager.my_params(self)
     my_params.append(self.embeddings)
     my_params.init_params(param_init)
Esempio n. 13
0
    def __init__(
        self,
        policy_network=None,
        baseline=None,
        z_normalization=True,
        conf_penalty=None,
        weight=1.0,
        input_dim=Ref("exp_global.default_layer_dim"),
        output_dim=2,
        param_init=Ref("exp_global.param_init",
                       default=bare(param_initializers.GlorotInitializer)),
        bias_init=Ref("exp_global.bias_init",
                      default=bare(param_initializers.ZeroInitializer))):
        self.input_dim = input_dim
        self.policy_network = self.add_serializable_component(
            "policy_network", policy_network,
            lambda: transforms.Linear(input_dim=self.input_dim,
                                      output_dim=output_dim,
                                      param_init=param_init,
                                      bias_init=bias_init))
        self.baseline = self.add_serializable_component(
            "baseline", baseline,
            lambda: transforms.Linear(input_dim=self.input_dim,
                                      output_dim=1,
                                      param_init=param_init,
                                      bias_init=bias_init))

        self.confidence_penalty = self.add_serializable_component(
            "conf_penalty", conf_penalty,
            lambda: conf_penalty) if conf_penalty is not None else None
        self.weight = weight
        self.z_normalization = z_normalization
Esempio n. 14
0
 def __init__(self,
              word_vocab=None,
              embedding=None,
              ngram_size=4,
              vocab_size=32000,
              cache_id_pool=None,
              cache_word_table=None,
              char_vocab=Ref(Path("model.src_reader.vocab")),
              hidden_dim=Ref("exp_global.default_layer_dim"),
              param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)),
              bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer))):
   super().__init__(word_vocab, vocab_size, cache_id_pool, cache_word_table)
   # Attributes
   if word_vocab is None:
     self.dict_entry = vocab_size+1
   else:
     self.dict_entry = len(word_vocab)
   self.char_vocab = char_vocab
   self.param_init = param_init
   self.bias_init = bias_init
   self.hidden_dim = hidden_dim
   self.word_vect = None
   # Word Embedding
   self.ngram_size = ngram_size
   self.embedding = self.add_serializable_component("embedding", embedding,
                                                     lambda: Linear(input_dim=self.dict_entry,
                                                                    output_dim=hidden_dim,
                                                                    param_init=param_init,
                                                                    bias_init=bias_init))
Esempio n. 15
0
 def __init__(
     self,
     tasks: Sequence[train_tasks.TrainingTask],
     task_weights: Optional[Sequence[numbers.Real]] = None,
     trainer: optimizers.XnmtOptimizer = bare(optimizers.SimpleSGDTrainer,
                                              e0=0.1),
     dev_zero: bool = False,
     loss_comb_method: str = Ref("exp_global.loss_comb_method",
                                 default="sum"),
     update_every_within: numbers.Integral = 1,
     update_every_across: numbers.Integral = 1,
     commandline_args=Ref("exp_global.commandline_args", default=None)
 ) -> None:
     super().__init__(tasks=tasks,
                      trainer=trainer,
                      dev_zero=dev_zero,
                      update_every=update_every_across,
                      commandline_args=commandline_args)
     if update_every_within != 1 and update_every_across != 1:
         raise ValueError(
             "update_every_within and update_every_across cannot be mixed.")
     self.update_every_within = update_every_within
     self.task_weights = task_weights or [1. / len(tasks)] * len(tasks)
     if len(self.task_weights) != len(self.tasks):
         raise ValueError(
             f"number of tasks must match number of task weights; "
             f"found: {len(self.task_weights)} != {len(self.tasks)}")
     self.train_loss_trackers = {
         task: loss_trackers.TrainLossTracker(task)
         for task in tasks
     }
     self.loss_comb_method = loss_comb_method
Esempio n. 16
0
 def __init__(self,
              input_dim=Ref("exp_global.default_layer_dim"),
              state_dim=Ref("exp_global.default_layer_dim"),
              hidden_dim=Ref("exp_global.default_layer_dim"),
              param_init=Ref("exp_global.param_init",
                             default=bare(GlorotInitializer)),
              bias_init=Ref("exp_global.bias_init",
                            default=bare(ZeroInitializer))):
     self.input_dim = input_dim
     self.state_dim = state_dim
     self.hidden_dim = hidden_dim
     param_collection = ParamManager.my_params(self)
     self.pW = param_collection.add_parameters((hidden_dim, input_dim),
                                               init=param_init.initializer(
                                                   (hidden_dim, input_dim)))
     self.pV = param_collection.add_parameters((hidden_dim, state_dim),
                                               init=param_init.initializer(
                                                   (hidden_dim, state_dim)))
     self.pb = param_collection.add_parameters((hidden_dim, ),
                                               init=bias_init.initializer(
                                                   (hidden_dim, )))
     self.pU = param_collection.add_parameters((1, hidden_dim),
                                               init=param_init.initializer(
                                                   (1, hidden_dim)))
     self.curr_sent = None
Esempio n. 17
0
    def __init__(
        self,
        model: models.ConditionedModel = Ref("model"),
        src_file: Union[None, str, Sequence[str]] = None,
        trg_file: Optional[str] = None,
        dev_every: numbers.Integral = 0,
        dev_zero: bool = False,
        batcher: batchers.Batcher = bare(batchers.SrcBatcher, batch_size=32),
        loss_calculator: loss_calculators.LossCalculator = bare(
            loss_calculators.MLELoss),
        trainer: optimizers.XnmtOptimizer = bare(optimizers.SimpleSGDTrainer,
                                                 e0=0.1),
        run_for_epochs: Optional[numbers.Integral] = None,
        lr_decay: numbers.Real = 1.0,
        lr_decay_times: numbers.Integral = 3,
        patience: numbers.Integral = 1,
        initial_patience: Optional[numbers.Integral] = None,
        dev_tasks: Sequence[eval_tasks.EvalTask] = None,
        dev_combinator: Optional[str] = None,
        restart_trainer: bool = False,
        reload_command: Optional[str] = None,
        name: str = "{EXP}",
        sample_train_sents: Optional[numbers.Integral] = None,
        max_num_train_sents: Optional[numbers.Integral] = None,
        max_src_len: Optional[numbers.Integral] = None,
        max_trg_len: Optional[numbers.Integral] = None,
        loss_comb_method: str = Ref("exp_global.loss_comb_method",
                                    default="sum"),
        update_every: numbers.Integral = 1,
        commandline_args: dict = Ref("exp_global.commandline_args", default={})
    ) -> None:

        super().__init__(model=model,
                         src_file=src_file,
                         trg_file=trg_file,
                         dev_every=dev_every,
                         batcher=batcher,
                         loss_calculator=loss_calculator,
                         run_for_epochs=run_for_epochs,
                         lr_decay=lr_decay,
                         lr_decay_times=lr_decay_times,
                         patience=patience,
                         initial_patience=initial_patience,
                         dev_tasks=dev_tasks,
                         dev_combinator=dev_combinator,
                         restart_trainer=restart_trainer,
                         reload_command=reload_command,
                         name=name,
                         sample_train_sents=sample_train_sents,
                         max_num_train_sents=max_num_train_sents,
                         max_src_len=max_src_len,
                         max_trg_len=max_trg_len)
        self.dev_zero = dev_zero
        self.trainer = trainer or optimizers.SimpleSGDTrainer(e0=0.1)
        self.dynet_profiling = commandline_args.get(
            "dynet_profiling", 0) if commandline_args else 0
        self.train_loss_tracker = loss_trackers.TrainLossTracker(self)
        self.loss_comb_method = loss_comb_method
        self.update_every = update_every
        self.num_updates_skipped = 0
Esempio n. 18
0
 def __init__(
         self,
         layers,
         input_dim=Ref("exp_global.default_layer_dim"),
         hidden_dim=Ref("exp_global.default_layer_dim"),
         dropout=Ref("exp_global.dropout", default=0.0),
         stride=1,
         filter_width=2,
         param_init=Ref("exp_global.param_init",
                        default=bare(param_initializers.GlorotInitializer)),
         bias_init=Ref("exp_global.bias_init",
                       default=bare(param_initializers.ZeroInitializer)),
         forward_layers=None,
         backward_layers=None):
     self.num_layers = layers
     self.hidden_dim = hidden_dim
     assert hidden_dim % 2 == 0
     self.forward_layers = self.add_serializable_component(
         "forward_layers", forward_layers,
         lambda: self.init_layers(input_dim, hidden_dim, dropout, stride,
                                  filter_width, param_init, bias_init))
     self.backward_layers = self.add_serializable_component(
         "backward_layers", backward_layers,
         lambda: self.init_layers(input_dim, hidden_dim, dropout, stride,
                                  filter_width, param_init, bias_init))
Esempio n. 19
0
    def __init__(self,
                 input_dim: numbers.Integral = Ref(
                     "exp_global.default_layer_dim"),
                 vocab_size: Optional[numbers.Integral] = None,
                 vocab: Optional[vocabs.Vocab] = None,
                 trg_reader: Optional[input_readers.InputReader] = Ref(
                     "model.trg_reader", default=None),
                 label_smoothing: numbers.Real = 0.0,
                 param_init: param_initializers.ParamInitializer = Ref(
                     "exp_global.param_init",
                     default=bare(param_initializers.GlorotInitializer)),
                 bias_init: param_initializers.ParamInitializer = Ref(
                     "exp_global.bias_init",
                     default=bare(param_initializers.ZeroInitializer)),
                 output_projector: transforms.Linear = None) -> None:
        self.param_col = param_collections.ParamManager.my_params(self)
        self.input_dim = input_dim
        self.output_dim = self._choose_vocab_size(vocab_size, vocab,
                                                  trg_reader)
        self.label_smoothing = label_smoothing

        self.output_projector = self.add_serializable_component(
            "output_projector", output_projector, lambda: output_projector or
            transforms.Linear(input_dim=self.input_dim,
                              output_dim=self.output_dim,
                              param_init=param_init,
                              bias_init=bias_init))
Esempio n. 20
0
    def __init__(
        self,
        input_dim=Ref("exp_global.default_layer_dim"),
        hidden_dim=Ref("exp_global.default_layer_dim"),
        dropout=Ref("exp_global.dropout", default=0.0),
        filter_width=2,
        stride=1,
        param_init=Ref("exp_global.param_init",
                       default=bare(param_initializers.GlorotInitializer)),
        bias_init=Ref("exp_global.bias_init",
                      default=bare(param_initializers.ZeroInitializer))):
        model = param_collections.ParamManager.my_params(self)
        self.hidden_dim = hidden_dim
        self.dropout = dropout
        self.input_dim = input_dim
        self.stride = stride

        dim_f = (filter_width, 1, input_dim, hidden_dim * 3)
        self.p_f = model.add_parameters(dim=dim_f,
                                        init=param_init.initializer(
                                            dim_f, num_shared=3))  # f, o, z
        dim_b = (hidden_dim * 3, )
        self.p_b = model.add_parameters(dim=dim_b,
                                        init=bias_init.initializer(
                                            dim_b, num_shared=3))
Esempio n. 21
0
    def __init__(self,
                 layers,
                 input_dim,
                 hidden_dim,
                 param_init=Ref("exp_global.param_init",
                                default=bare(GlorotInitializer)),
                 bias_init=Ref("exp_global.bias_init",
                               default=bare(ZeroInitializer))):
        if layers != 1:
            raise RuntimeError(
                "CustomLSTMSeqTransducer supports only exactly one layer")
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        model = ParamManager.my_params(self)

        # [i; f; o; g]
        self.p_Wx = model.add_parameters(dim=(hidden_dim * 4, input_dim),
                                         init=param_init.initializer(
                                             (hidden_dim * 4, input_dim)))
        self.p_Wh = model.add_parameters(dim=(hidden_dim * 4, hidden_dim),
                                         init=param_init.initializer(
                                             (hidden_dim * 4, hidden_dim)))
        self.p_b = model.add_parameters(dim=(hidden_dim * 4, ),
                                        init=bias_init.initializer(
                                            (hidden_dim * 4, )))
Esempio n. 22
0
    def __init__(self,
                 input_dim=Ref("exp_global.default_layer_dim"),
                 trg_embed_dim=Ref("exp_global.default_layer_dim"),
                 input_feeding=True,
                 rnn_layer=bare(UniLSTMSeqTransducer),
                 mlp_layer=bare(MLP),
                 bridge=bare(CopyBridge),
                 label_smoothing=0.0):
        self.param_col = ParamManager.my_params(self)
        self.input_dim = input_dim
        self.label_smoothing = label_smoothing
        # Input feeding
        self.input_feeding = input_feeding
        rnn_input_dim = trg_embed_dim
        if input_feeding:
            rnn_input_dim += input_dim
        assert rnn_input_dim == rnn_layer.input_dim, "Wrong input dimension in RNN layer"
        # Bridge
        self.bridge = bridge

        # LSTM
        self.rnn_layer = rnn_layer

        # MLP
        self.mlp_layer = mlp_layer
Esempio n. 23
0
    def __init__(
        self,
        input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
        state_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
        hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
        param_init: param_initializers.ParamInitializer = Ref(
            "exp_global.param_init",
            default=bare(param_initializers.GlorotInitializer)),
        bias_init: param_initializers.ParamInitializer = Ref(
            "exp_global.bias_init",
            default=bare(param_initializers.ZeroInitializer))
    ) -> None:
        self.input_dim = input_dim
        self.state_dim = state_dim
        self.hidden_dim = hidden_dim
        my_params = param_collections.ParamManager.my_params(self)
        self.linear_context = nn.Linear(input_dim, hidden_dim,
                                        bias=True).to(xnmt.device)
        self.linear_query = nn.Linear(state_dim, hidden_dim,
                                      bias=False).to(xnmt.device)
        self.pU = nn.Linear(hidden_dim, 1, bias=False).to(xnmt.device)
        my_params.append(self.linear_context)
        my_params.append(self.linear_query)
        my_params.append(self.pU)
        my_params.init_params(param_init, bias_init)

        self.curr_sent = None
        self.attention_vecs = None
        self.WI = None
Esempio n. 24
0
 def __init__(self,
              src_file: Union[str, Sequence[str]],
              ref_file: Optional[str] = None,
              model: 'model_base.GeneratorModel' = Ref("model"),
              batcher: Batcher = Ref("train.batcher",
                                     default=bare(xnmt.batchers.SrcBatcher,
                                                  batch_size=32)),
              loss_calculator: LossCalculator = bare(MLELoss),
              max_src_len: Optional[int] = None,
              max_trg_len: Optional[int] = None,
              max_num_sents: Optional[int] = None,
              loss_comb_method: str = Ref("exp_global.loss_comb_method",
                                          default="sum"),
              desc: Any = None):
     self.model = model
     self.loss_calculator = loss_calculator
     self.src_file = src_file
     self.ref_file = ref_file
     self.batcher = batcher
     self.src_data = None
     self.max_src_len = max_src_len
     self.max_trg_len = max_trg_len
     self.max_num_sents = max_num_sents
     self.loss_comb_method = loss_comb_method
     self.desc = desc
Esempio n. 25
0
 def __init__(self,
              input_dim: int = Ref("exp_global.default_layer_dim"),
              hidden_dim: int = Ref("exp_global.default_layer_dim"),
              downsample_by: int = 1,
              param_init=Ref("exp_global.param_init",
                             default=bare(
                                 param_initializers.GlorotInitializer)),
              projection=None,
              batch_norm=None,
              nonlinearity=None):
     self.projection = self.add_serializable_component(
         "projection", projection,
         lambda: base.TransformSeqTransducer(modelparts_transforms.Linear(
             input_dim=input_dim * downsample_by,
             output_dim=hidden_dim,
             bias=False,
             param_init=param_init),
                                             downsample_by=downsample_by))
     self.batch_norm = self.add_serializable_component(
         "batch_norm", batch_norm,
         lambda: norms.BatchNorm(hidden_dim=hidden_dim, num_dim=2))
     self.nonlinearity = self.add_serializable_component(
         "nonlinearity", nonlinearity, lambda: base.TransformSeqTransducer(
             modelparts_transforms.Cwise("rectify")))
     self.modules = [self.projection, self.batch_norm, self.nonlinearity]
Esempio n. 26
0
  def __init__(self, model=Ref("model"), src_file=None, trg_file=None, dev_every=0, dev_zero=False,
               batcher=bare(xnmt.batcher.SrcBatcher, batch_size=32), loss_calculator=None, trainer=None,
               run_for_epochs=None, lr_decay=1.0, lr_decay_times=3, patience=1, initial_patience=None, dev_tasks=None,
               restart_trainer: bool = False, reload_command=None, name="{EXP}", sample_train_sents=None,
               max_num_train_sents=None, max_src_len=None, max_trg_len=None,
               commandline_args=Ref("exp_global.commandline_args", default=None)):

    super().__init__(model=model,
                     src_file=src_file,
                     trg_file=trg_file,
                     dev_every=dev_every,
                     batcher=batcher,
                     loss_calculator=loss_calculator,
                     run_for_epochs=run_for_epochs,
                     lr_decay=lr_decay,
                     lr_decay_times=lr_decay_times,
                     patience=patience,
                     initial_patience=initial_patience,
                     dev_tasks=dev_tasks,
                     restart_trainer=restart_trainer,
                     reload_command=reload_command,
                     name=name,
                     sample_train_sents=sample_train_sents,
                     max_num_train_sents=max_num_train_sents,
                     max_src_len=max_src_len,
                     max_trg_len=max_trg_len)
    self.dev_zero = dev_zero
    self.trainer = trainer or xnmt.optimizer.SimpleSGDTrainer(e0=0.1)
    self.dynet_profiling = getattr(commandline_args, "dynet_profiling", 0) if commandline_args else 0
Esempio n. 27
0
 def __init__(self,
              input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
              state_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
              hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
              param_init: param_initializers.ParamInitializer = Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer)),
              bias_init: param_initializers.ParamInitializer = Ref("exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)))\
         -> None:
     self.input_dim = input_dim
     self.state_dim = state_dim
     self.hidden_dim = hidden_dim
     param_collection = param_collections.ParamManager.my_params(self)
     self.pW = param_collection.add_parameters((hidden_dim, input_dim),
                                               init=param_init.initializer(
                                                   (hidden_dim, input_dim)))
     self.pV = param_collection.add_parameters((hidden_dim, state_dim),
                                               init=param_init.initializer(
                                                   (hidden_dim, state_dim)))
     self.pb = param_collection.add_parameters((hidden_dim, ),
                                               init=bias_init.initializer(
                                                   (hidden_dim, )))
     self.pU = param_collection.add_parameters((1, hidden_dim),
                                               init=param_init.initializer(
                                                   (1, hidden_dim)))
     self.pL = param_collection.add_parameters((100, 1, 1, hidden_dim),
                                               init=param_init.initializer(
                                                   (100, 1, 1, hidden_dim)))
     self.curr_sent = None
Esempio n. 28
0
 def __init__(
     self,
     tasks: Sequence[train_tasks.TrainingTask],
     trainer: optimizers.XnmtOptimizer = bare(optimizers.SimpleSGDTrainer,
                                              e0=0.1),
     dev_zero: bool = False,
     per_task_backward: bool = True,
     loss_comb_method: str = Ref("exp_global.loss_comb_method",
                                 default="sum"),
     update_every: numbers.Integral = 1,
     n_task_steps: Optional[Sequence[numbers.Integral]] = None,
     commandline_args: dict = Ref("exp_global.commandline_args",
                                  default=None)
 ) -> None:
     super().__init__(tasks=tasks,
                      trainer=trainer,
                      dev_zero=dev_zero,
                      update_every=update_every,
                      commandline_args=commandline_args)
     self.train_loss_trackers = {
         task: loss_trackers.TrainLossTracker(task)
         for task in tasks
     }
     self.per_task_backward = per_task_backward
     self.loss_comb_method = loss_comb_method
     self.n_task_steps = n_task_steps or [1] * len(tasks)
     if len(self.n_task_steps) != len(tasks):
         raise ValueError(
             f"number of tasks and steps per task do not match: {len(tasks)} != {len(self.n_task_steps)}"
         )
Esempio n. 29
0
 def __init__(self,
              input_dim=512,
              layers=1,
              hidden_dim=Ref("exp_global.default_layer_dim"),
              residual_to_output=False,
              dropout=0.0,
              bidirectional=True,
              builder=None,
              yaml_path=None,
              decoder_input_dim=Ref("exp_global.default_layer_dim",
                                    default=None),
              decoder_input_feeding=True):
     self._final_states = None
     if yaml_path is not None and "decoder" in yaml_path:
         bidirectional = False
         if decoder_input_feeding:
             input_dim += decoder_input_dim
     if bidirectional:
         self.builder = self.add_serializable_component(
             "builder", builder,
             lambda: ResidualBiRNNBuilder(num_layers=layers,
                                          input_dim=input_dim,
                                          hidden_dim=hidden_dim,
                                          add_to_output=residual_to_output,
                                          dropout=dropout))
     else:
         self.builder = self.add_serializable_component(
             "builder", builder,
             lambda: ResidualRNNBuilder(num_layers=layers,
                                        input_dim=input_dim,
                                        hidden_dim=hidden_dim,
                                        add_to_output=residual_to_output,
                                        dropout=dropout))
Esempio n. 30
0
    def __init__(
            self,
            input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
            dropout: numbers.Real = Ref("exp_global.dropout", default=0.0),
            param_init: param_initializers.ParamInitializer = Ref(
                "exp_global.param_init",
                default=bare(param_initializers.GlorotInitializer)),
            bias_init: param_initializers.ParamInitializer = Ref(
                "exp_global.bias_init",
                default=bare(param_initializers.ZeroInitializer)),
            num_heads: numbers.Integral = 8):
        assert (input_dim % num_heads == 0)

        self.dropout = dropout

        param_collection = param_collections.ParamManager.my_params(self)

        self.input_dim = input_dim
        self.num_heads = num_heads
        self.head_dim = input_dim // num_heads

        self.pWq, self.pWk, self.pWv, self.pWo = [
            param_collection.add_parameters(dim=(input_dim, input_dim),
                                            init=param_init.initializer(
                                                (input_dim, input_dim)))
            for _ in range(4)
        ]
        self.pbq, self.pbk, self.pbv, self.pbo = [
            param_collection.add_parameters(dim=(1, input_dim),
                                            init=bias_init.initializer((
                                                1,
                                                input_dim,
                                            ))) for _ in range(4)
        ]