Ejemplo n.º 1
0
  def __init__(self,
               input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
               output_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
               bias: bool = True,
               activation: str = 'tanh',
               param_init: param_initializers.ParamInitializer = Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer)),
               bias_init: param_initializers.ParamInitializer = Ref("exp_global.bias_init", default=bare(param_initializers.ZeroInitializer))) -> None:
    self.bias = bias
    self.output_dim = output_dim
    self.input_dim = input_dim
    if activation == 'tanh':
      self.activation = dy.tanh
    elif activation == 'relu':
      self.activation = dy.rectify
    elif activation == 'sigmoid':
      self.activation = dy.sigmoid
    elif activation == 'elu':
      self.activation = dy.elu
    elif activation == 'selu':
      self.activation = dy.selu
    elif activation == 'asinh':
      self.activation = dy.asinh
    elif activation == 'identity':
      def identity(x):
        return x
      self.activation = identity
    else:
      raise ValueError('Unknown activation %s' % activation)

    model = param_collections.ParamManager.my_params(self)
    self.W1 = model.add_parameters((self.output_dim, self.input_dim), init=param_init.initializer((self.output_dim, self.input_dim)))
    if self.bias:
      self.b1 = model.add_parameters((self.output_dim,), init=bias_init.initializer((self.output_dim,)))
Ejemplo n.º 2
0
 def __init__(
     self,
     input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     state_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     param_init: param_initializers.ParamInitializer = Ref(
         "exp_global.param_init",
         default=bare(param_initializers.GlorotInitializer)),
     bias_init: param_initializers.ParamInitializer = Ref(
         "exp_global.bias_init",
         default=bare(param_initializers.ZeroInitializer)),
     truncate_dec_batches: bool = Ref("exp_global.truncate_dec_batches",
                                      default=False)
 ) -> None:
     self.input_dim = input_dim
     self.state_dim = state_dim
     self.hidden_dim = hidden_dim
     self.truncate_dec_batches = truncate_dec_batches
     param_collection = param_collections.ParamManager.my_params(self)
     self.pW = param_collection.add_parameters((hidden_dim, input_dim),
                                               init=param_init.initializer(
                                                   (hidden_dim, input_dim)))
     self.pV = param_collection.add_parameters((hidden_dim, state_dim),
                                               init=param_init.initializer(
                                                   (hidden_dim, state_dim)))
     self.pb = param_collection.add_parameters((hidden_dim, ),
                                               init=bias_init.initializer(
                                                   (hidden_dim, )))
     self.pU = param_collection.add_parameters((1, hidden_dim),
                                               init=param_init.initializer(
                                                   (1, hidden_dim)))
     self.curr_sent = None
Ejemplo n.º 3
0
  def __init__(self,
               input_dim: int,
               chn_dim: int = 3,
               num_filters: int = 32,
               param_init: param_initializers.ParamInitializer = Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer)),
               bias_init: param_initializers.ParamInitializer = Ref("exp_global.bias_init", default=bare(param_initializers.ZeroInitializer))):
    model = param_collections.ParamManager.my_params(self)
    if input_dim % chn_dim != 0:
      raise RuntimeError("input_dim must be divisible by chn_dim")
    self.input_dim = input_dim

    self.chn_dim = chn_dim
    self.freq_dim = input_dim / chn_dim
    self.num_filters = num_filters
    self.filter_size_time = 1
    self.filter_size_freq = 3

    self.params = {}
    for direction in ["fwd", "bwd"]:
      dim_x = (self.filter_size_time, self.filter_size_freq,
               self.chn_dim, self.num_filters * 4)
      self.params["x2all_" + direction] = \
        model.add_parameters(dim=dim_x,
                             init=param_init.initializer(dim_x, num_shared=4))
      dim_h = (self.filter_size_time, self.filter_size_freq,
               self.num_filters, self.num_filters * 4)
      self.params["h2all_" + direction] = \
        model.add_parameters(dim=dim_h,
                             init=param_init.initializer(dim_h, num_shared=4))
      dim_b = (self.num_filters * 4,)
      self.params["b_" + direction] = \
        model.add_parameters(dim=dim_b, init=bias_init.initializer(dim_b, num_shared=4))
Ejemplo n.º 4
0
 def __init__(self,
              emb_dim=Ref("exp_global.default_layer_dim"),
              weight_noise=Ref("exp_global.weight_noise", default=0.0),
              word_dropout=0.0,
              fix_norm=None,
              param_init: param_initializers.ParamInitializer = Ref(
                  "exp_global.param_init",
                  default=bare(param_initializers.GlorotInitializer)),
              bias_init: param_initializers.ParamInitializer = Ref(
                  "exp_global.bias_init",
                  default=bare(param_initializers.ZeroInitializer)),
              vocab_size=None,
              vocab=None,
              yaml_path=None,
              src_reader=Ref("model.src_reader", default=None),
              trg_reader=Ref("model.trg_reader", default=None)):
     self.fix_norm = fix_norm
     self.weight_noise = weight_noise
     self.word_dropout = word_dropout
     self.emb_dim = emb_dim
     param_collection = param_collections.ParamManager.my_params(self)
     self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path,
                                              src_reader, trg_reader)
     self.save_processed_arg("vocab_size", self.vocab_size)
     self.embeddings = param_collection.add_parameters(
         (self.vocab_size, self.emb_dim),
         init=param_init.initializer((self.vocab_size, self.emb_dim),
                                     is_lookup=True))
     self.bias = param_collection.add_parameters((self.vocab_size, ),
                                                 init=bias_init.initializer(
                                                     (self.vocab_size, )))
Ejemplo n.º 5
0
    def __init__(
        self,
        layers: numbers.Integral,
        input_dim: numbers.Integral,
        hidden_dim: numbers.Integral,
        param_init: param_initializers.ParamInitializer = Ref(
            "exp_global.param_init",
            default=bare(param_initializers.GlorotInitializer)),
        bias_init: param_initializers.ParamInitializer = Ref(
            "exp_global.bias_init",
            default=bare(param_initializers.ZeroInitializer))
    ) -> None:
        if layers != 1:
            raise RuntimeError(
                "CustomLSTMSeqTransducer supports only exactly one layer")
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        model = param_collections.ParamManager.my_params(self)

        # [i; f; o; g]
        self.p_Wx = model.add_parameters(dim=(hidden_dim * 4, input_dim),
                                         init=param_init.initializer(
                                             (hidden_dim * 4, input_dim)))
        self.p_Wh = model.add_parameters(dim=(hidden_dim * 4, hidden_dim),
                                         init=param_init.initializer(
                                             (hidden_dim * 4, hidden_dim)))
        self.p_b = model.add_parameters(dim=(hidden_dim * 4, ),
                                        init=bias_init.initializer(
                                            (hidden_dim * 4, )))
Ejemplo n.º 6
0
    def __init__(
            self,
            input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
            dropout: numbers.Real = Ref("exp_global.dropout", default=0.0),
            param_init: param_initializers.ParamInitializer = Ref(
                "exp_global.param_init",
                default=bare(param_initializers.GlorotInitializer)),
            bias_init: param_initializers.ParamInitializer = Ref(
                "exp_global.bias_init",
                default=bare(param_initializers.ZeroInitializer)),
            num_heads: numbers.Integral = 8):
        assert (input_dim % num_heads == 0)

        self.dropout = dropout

        param_collection = param_collections.ParamManager.my_params(self)

        self.input_dim = input_dim
        self.num_heads = num_heads
        self.head_dim = input_dim // num_heads

        self.pWq, self.pWk, self.pWv, self.pWo = [
            param_collection.add_parameters(dim=(input_dim, input_dim),
                                            init=param_init.initializer(
                                                (input_dim, input_dim)))
            for _ in range(4)
        ]
        self.pbq, self.pbk, self.pbv, self.pbo = [
            param_collection.add_parameters(dim=(1, input_dim),
                                            init=bias_init.initializer((
                                                1,
                                                input_dim,
                                            ))) for _ in range(4)
        ]
Ejemplo n.º 7
0
    def __init__(
        self,
        input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
        output_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
        bias: bool = True,
        activation: str = 'tanh',
        param_init: param_initializers.ParamInitializer = Ref(
            "exp_global.param_init",
            default=bare(param_initializers.GlorotInitializer)),
        bias_init: param_initializers.ParamInitializer = Ref(
            "exp_global.bias_init",
            default=bare(param_initializers.ZeroInitializer))
    ) -> None:
        self.bias = bias
        self.output_dim = output_dim
        self.input_dim = input_dim
        self.activation = tt.activation_by_name(activation)

        my_params = param_collections.ParamManager.my_params(self)
        self.W1 = my_params.add_parameters(
            (self.output_dim, self.input_dim),
            init=param_init.initializer((self.output_dim, self.input_dim)))
        if self.bias:
            self.b1 = my_params.add_parameters((self.output_dim, ),
                                               init=bias_init.initializer(
                                                   (self.output_dim, )))
Ejemplo n.º 8
0
    def __init__(self,
                 emb_dim: int = Ref("exp_global.default_layer_dim"),
                 vocab_size: Optional[int] = None,
                 vocab: Optional[vocabs.Vocab] = None,
                 yaml_path: Path = Path(''),
                 src_reader: Optional[input_readers.InputReader] = Ref(
                     "model.src_reader", default=None),
                 trg_reader: Optional[input_readers.InputReader] = Ref(
                     "model.trg_reader", default=None),
                 is_dense: bool = False,
                 param_init: pinit.ParamInitializer = Ref(
                     "exp_global.param_init",
                     default=bare(pinit.GlorotInitializer)),
                 bias_init: pinit.ParamInitializer = Ref(
                     "exp_global.bias_init",
                     default=bare(pinit.ZeroInitializer)),
                 init_fastext: Optional[str] = None,
                 weight_noise: float = Ref("exp_global.weight_noise",
                                           default=0.0),
                 fix_norm: Optional[float] = None):
        super().__init__(emb_dim=emb_dim,
                         weight_noise=weight_noise,
                         fix_norm=fix_norm)
        # Embedding Parameters
        pcol = param_collections.ParamManager.my_params(self)
        self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path,
                                                 src_reader, trg_reader)
        emb_mtr_dim = (self.vocab_size, self.emb_dim)

        if init_fastext is not None:
            logger.info("Setting Dense to False because of init_fastext")
            is_dense = False

        if not is_dense:
            if init_fastext is not None:
                self.embeddings = pcol.lookup_parameters_from_numpy(
                    self._read_fasttext_embeddings(vocab, init_fastext))
            else:
                self.embeddings = pcol.add_lookup_parameters(
                    emb_mtr_dim,
                    init=param_init.initializer(emb_mtr_dim, is_lookup=True))
        else:
            self.embeddings = pcol.add_parameters(emb_mtr_dim,
                                                  init=param_init.initializer(
                                                      emb_mtr_dim,
                                                      is_lookup=True))
            self.bias = pcol.add_parameters((self.vocab_size, ),
                                            init=bias_init.initializer(
                                                (self.vocab_size, )))

        # Model States
        self.is_dense = is_dense
        self.train = False
        self.save_processed_arg("vocab_size", self.vocab_size)
Ejemplo n.º 9
0
 def __init__(
     self,
     input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     state_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     param_init: param_initializers.ParamInitializer = Ref(
         "exp_global.param_init",
         default=bare(param_initializers.GlorotInitializer)),
     bias_init: param_initializers.ParamInitializer = Ref(
         "exp_global.bias_init",
         default=bare(param_initializers.ZeroInitializer))
 ) -> None:
     self.input_dim = input_dim
     self.state_dim = state_dim
     self.hidden_dim = hidden_dim
     my_params = param_collections.ParamManager.my_params(self)
     self.linear_context = my_params.add_parameters(
         (hidden_dim, input_dim),
         init=param_init[0].initializer((hidden_dim, input_dim)))
     self.linear_query = my_params.add_parameters(
         (hidden_dim, state_dim),
         init=param_init[1].initializer((hidden_dim, state_dim)))
     self.bias_context = my_params.add_parameters(
         (hidden_dim, ), init=bias_init.initializer((hidden_dim, )))
     self.pU = my_params.add_parameters((1, hidden_dim),
                                        init=param_init[2].initializer(
                                            (1, hidden_dim)))
     self.curr_sent = None
     self.attention_vecs = None
     self.WI = None
Ejemplo n.º 10
0
 def __init__(
     self,
     emb_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     weight_noise: numbers.Real = Ref("exp_global.weight_noise",
                                      default=0.0),
     word_dropout: numbers.Real = 0.0,
     fix_norm: Optional[numbers.Real] = None,
     param_init: param_initializers.ParamInitializer = Ref(
         "exp_global.param_init",
         default=bare(param_initializers.GlorotInitializer)),
     vocab_size: Optional[numbers.Integral] = None,
     vocab: Optional[vocabs.Vocab] = None,
     yaml_path=None,
     src_reader: Optional[input_readers.InputReader] = Ref(
         "model.src_reader", default=None),
     trg_reader: Optional[input_readers.InputReader] = Ref(
         "model.trg_reader", default=None)
 ) -> None:
     #print(f"embedder received param_init: {param_init}")
     self.emb_dim = emb_dim
     self.weight_noise = weight_noise
     self.word_dropout = word_dropout
     self.fix_norm = fix_norm
     self.word_id_mask = None
     self.train = False
     param_collection = param_collections.ParamManager.my_params(self)
     self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path,
                                              src_reader, trg_reader)
     self.save_processed_arg("vocab_size", self.vocab_size)
     self.embeddings = param_collection.add_lookup_parameters(
         (self.vocab_size, self.emb_dim),
         init=param_init.initializer((self.vocab_size, self.emb_dim),
                                     is_lookup=True))
Ejemplo n.º 11
0
 def __init__(self,
              max_pos: numbers.Integral,
              op: str = 'sum',
              emb_type: str = 'param',
              input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
              dropout: numbers.Real = Ref("exp_global.dropout", default=0.0),
              param_init: param_initializers.ParamInitializer = Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer))) \
         -> None:
     """
 max_pos: largest embedded position
 op: how to combine positional encodings with the original encodings, can be "sum" or "concat"
 type: what type of embddings to use, "param"=parameterized (others, such as the trigonometric embeddings are todo)
 input_dim: embedding size
 dropout: apply dropout to output of this transducer
 param_init: how to initialize embedding matrix
 """
     self.max_pos = max_pos
     self.input_dim = input_dim
     self.dropout = dropout
     self.op = op
     self.emb_type = emb_type
     param_init = param_init
     dim = (self.input_dim, max_pos)
     my_params = param_collections.ParamManager.my_params(self)
     self.embedder = my_params.add_parameters(dim,
                                              init=param_init.initializer(
                                                  dim, is_lookup=True))
Ejemplo n.º 12
0
 def __init__(self,
              char_vocab: Optional[vocabs.CharVocab] = Ref(
                  "model.src_reader.char_vocab", default=None),
              vocab_size: Optional[int] = None,
              emb_dim: int = Ref("exp_global.default_layer_dim"),
              weight_noise: float = Ref("exp_global.weight_noise",
                                        default=0.0),
              param_init: pinit.ParamInitializer = Ref(
                  "exp_global.param_init",
                  default=bare(pinit.GlorotInitializer)),
              bias_init: pinit.ParamInitializer = Ref(
                  "exp_global.bias_init",
                  default=bare(pinit.ZeroInitializer)),
              composer: seq_composer.SequenceComposer = bare(
                  seq_composer.SumComposer),
              fix_norm: Optional[float] = None):
     super().__init__(emb_dim=emb_dim,
                      weight_noise=weight_noise,
                      fix_norm=fix_norm)
     self.composer = composer
     # Embedding Parameters
     pcol = param_collections.ParamManager.my_params(self)
     self.vocab_size = self.choose_vocab_size(vocab_size, char_vocab, '',
                                              None, None)
     self.embeddings = pcol.add_lookup_parameters(
         (self.vocab_size, self.emb_dim),
         init=param_init.initializer((self.vocab_size, self.emb_dim),
                                     is_lookup=True))
     # Model States
     self.train = False
     self.save_processed_arg("vocab_size", self.vocab_size)
Ejemplo n.º 13
0
    def __init__(self,
                 weight_noise: float = Ref("exp_global.weight_noise",
                                           default=0.0),
                 layers: int = 1,
                 input_dim: int = 120,
                 chn_dim: int = 3,
                 num_filters: int = 32,
                 stride: Tuple[int] = (2, 2),
                 batch_norm: bool = False,
                 nonlinearity: str = "rectify",
                 pre_activation: bool = False,
                 output_tensor: bool = False,
                 transpose: bool = True,
                 param_init: param_initializers.ParamInitializer = Ref(
                     "exp_global.param_init",
                     default=bare(param_initializers.GlorotInitializer))):
        assert layers > 0
        if input_dim % chn_dim != 0:
            raise ValueError(
                f"StridedConvEncoder requires input_dim mod chn_dim == 0, got: {input_dim} and {chn_dim}"
            )

        param_col = param_collections.ParamManager.my_params(self)
        self.layers = layers
        self.chn_dim = chn_dim
        self.freq_dim = input_dim / chn_dim
        self.num_filters = num_filters
        self.filter_size_time = 3
        self.filter_size_freq = 3
        self.stride = stride
        self.output_transposed_tensor = output_tensor
        self.nonlinearity = nonlinearity
        self.pre_activation = pre_activation

        self.use_bn = batch_norm
        self.train = True
        self.transpose = transpose
        self.weight_noise = regularizers.WeightNoise(weight_noise)

        self.bn_layers = []
        self.filters_layers = []
        for layer_i in range(layers):
            filter_dim = (self.filter_size_time, self.filter_size_freq,
                          self.chn_dim if layer_i == 0 else self.num_filters,
                          self.num_filters)
            filters = param_col.add_parameters(
                dim=filter_dim,
                init=param_init[layer_i].initializer(filter_dim) if isinstance(
                    param_init,
                    Sequence) else param_init.initializer(filter_dim))
            if self.use_bn:
                self.bn_layers.append(
                    norms.BatchNorm(param_col,
                                    (self.chn_dim if self.pre_activation else
                                     self.num_filters), 3))
            self.filters_layers.append(filters)
Ejemplo n.º 14
0
 def __init__(self,
              input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
              state_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
              param_init: param_initializers.ParamInitializer = Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer)),
              truncate_dec_batches: bool = Ref("exp_global.truncate_dec_batches", default=False)) -> None:
   if truncate_dec_batches: raise NotImplementedError("truncate_dec_batches not yet implemented for BilinearAttender")
   self.input_dim = input_dim
   self.state_dim = state_dim
   param_collection = param_collections.ParamManager.my_params(self)
   self.pWa = param_collection.add_parameters((input_dim, state_dim), init=param_init.initializer((input_dim, state_dim)))
   self.curr_sent = None
Ejemplo n.º 15
0
 def __init__(
     self,
     input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     state_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     param_init: param_initializers.ParamInitializer = Ref(
         "exp_global.param_init",
         default=bare(param_initializers.GlorotInitializer))
 ) -> None:
     self.input_dim = input_dim
     self.state_dim = state_dim
     my_params = param_collections.ParamManager.my_params(self)
     self.pWa = my_params.add_parameters((input_dim, state_dim),
                                         init=param_init.initializer(
                                             (input_dim, state_dim)))
     self.curr_sent = None
Ejemplo n.º 16
0
 def __init__(self,
              hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
              output_len: numbers.Integral = 32,
              pos_enc_max: Optional[numbers.Integral] = None,
              param_init: param_initializers.ParamInitializer = Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer))) \
         -> None:
   subcol = param_collections.ParamManager.my_params(self)
   self.output_len = output_len
   self.W = subcol.add_parameters(dim=(hidden_dim, output_len),
                                  init=param_init.initializer((hidden_dim, output_len)))
   self.pos_enc_max = pos_enc_max
   if self.pos_enc_max:
     self.pos_enc = np.zeros((self.pos_enc_max, self.output_len))
     for k in range(self.output_len):
       for s in range(self.pos_enc_max):
         self.pos_enc[s, k] = (1.0 - k / self.output_len) * (
                 1.0 - s / self.pos_enc_max) + k / self.output_len * s / self.pos_enc_max
Ejemplo n.º 17
0
 def __init__(self,
              max_pos: numbers.Integral,
              emb_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
              param_init: param_initializers.ParamInitializer = Ref("exp_global.param_init",
                                                                    default=bare(param_initializers.GlorotInitializer))) \
         -> None:
   """
   max_pos: largest embedded position
   emb_dim: embedding size
   param_init: how to initialize embedding matrix
   """
   self.max_pos = max_pos
   self.emb_dim = emb_dim
   param_collection = param_collections.ParamManager.my_params(self)
   param_init = param_init
   dim = (self.emb_dim, max_pos)
   self.embeddings = param_collection.add_parameters(dim, init=param_init.initializer(dim, is_lookup=True))
Ejemplo n.º 18
0
 def __init__(self,
              max_pos: int,
              op: str = 'sum',
              emb_type: str = 'param',
              input_dim: int = Ref("exp_global.default_layer_dim"),
              param_init: ParamInitializer = Ref("exp_global.param_init", default=bare(GlorotInitializer))):
   """
   max_pos: largest embedded position
   op: how to combine positional encodings with the original encodings, can be "sum" or "concat"
   type: what type of embddings to use, "param"=parameterized (others, such as the trigonometric embeddings are todo)
   input_dim: embedding size
   param_init: how to initialize embedding matrix
   """
   self.max_pos = max_pos
   self.input_dim = input_dim
   self.op = op
   self.emb_type = emb_type
   param_init = param_init
   dim = (self.input_dim, max_pos)
   param_collection = ParamManager.my_params(self)
   self.embedder = param_collection.add_parameters(dim, init=param_init.initializer(dim, is_lookup=True))
Ejemplo n.º 19
0
 def __init__(
     self,
     emb_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
     weight_noise: numbers.Real = Ref("exp_global.weight_noise",
                                      default=0.0),
     word_dropout: numbers.Real = 0.0,
     fix_norm: Optional[numbers.Real] = None,
     param_init: param_initializers.ParamInitializer = Ref(
         "exp_global.param_init",
         default=bare(param_initializers.GlorotInitializer)),
     vocab_size: Optional[numbers.Integral] = None,
     vocab: Optional[vocabs.Vocab] = None,
     yaml_path: Path = Path(),
     src_reader: Optional[input_readers.InputReader] = Ref(
         "model.src_reader", default=None),
     trg_reader: Optional[input_readers.InputReader] = Ref(
         "model.trg_reader", default=None)
 ) -> None:
     self.emb_dim = emb_dim
     self.weight_noise = weight_noise
     self.word_dropout = word_dropout
     self.fix_norm = fix_norm
     self.word_id_mask = None
     self.train = False
     my_params = param_collections.ParamManager.my_params(self)
     self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path,
                                              src_reader, trg_reader)
     self.save_processed_arg("vocab_size", self.vocab_size)
     if isinstance(param_init, param_initializers.NumpyInitializer):
         if param_init.array.shape != (self.vocab_size, self.emb_dim):
             raise ValueError(
                 f"Expected numpy array of shape {(self.vocab_size, self.emb_dim)}, got {param_init.array.shape}"
             )
         self.embeddings = my_params.lookup_parameters_from_numpy(
             param_init.array, name=self.xnmt_subcol_name)
     else:
         self.embeddings = my_params.add_lookup_parameters(
             (self.vocab_size, self.emb_dim),
             init=param_init.initializer((self.vocab_size, self.emb_dim),
                                         is_lookup=True))