コード例 #1
0
ファイル: policy_gradient.py プロジェクト: msperber/misc
    def __init__(
        self,
        policy_network=None,
        baseline=None,
        z_normalization=True,
        conf_penalty=None,
        weight=1.0,
        input_dim=Ref("exp_global.default_layer_dim"),
        output_dim=2,
        param_init=Ref("exp_global.param_init",
                       default=bare(param_initializers.GlorotInitializer)),
        bias_init=Ref("exp_global.bias_init",
                      default=bare(param_initializers.ZeroInitializer))):
        self.input_dim = input_dim
        self.policy_network = self.add_serializable_component(
            "policy_network", policy_network,
            lambda: transforms.Linear(input_dim=self.input_dim,
                                      output_dim=output_dim,
                                      param_init=param_init,
                                      bias_init=bias_init))
        self.baseline = self.add_serializable_component(
            "baseline", baseline,
            lambda: transforms.Linear(input_dim=self.input_dim,
                                      output_dim=1,
                                      param_init=param_init,
                                      bias_init=bias_init))

        self.confidence_penalty = self.add_serializable_component(
            "conf_penalty", conf_penalty,
            lambda: conf_penalty) if conf_penalty is not None else None
        self.weight = weight
        self.z_normalization = z_normalization
コード例 #2
0
ファイル: scorers.py プロジェクト: rezahaffari/xnmt
    def __init__(
        self,
        input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"),
        vocab_size: Optional[numbers.Integral] = None,
        vocab: Optional[vocabs.Vocab] = None,
        trg_reader: Optional[input_readers.InputReader] = Ref(
            "model.trg_reader", default=None),
        attender=Ref("model.attender"),
        label_smoothing: numbers.Real = 0.0,
        param_init: param_initializers.ParamInitializer = Ref(
            "exp_global.param_init",
            default=bare(param_initializers.GlorotInitializer)),
        bias_init: param_initializers.ParamInitializer = Ref(
            "exp_global.bias_init",
            default=bare(param_initializers.ZeroInitializer)),
        output_projector: transforms.Linear = None,
        lexicon_file=None,
        lexicon_alpha=0.001,
        lexicon_type='bias',
        coef_predictor: transforms.Linear = None,
        src_vocab=Ref("model.src_reader.vocab", default=None)
    ) -> None:
        self.param_col = param_collections.ParamManager.my_params(self)
        self.input_dim = input_dim
        self.output_dim = self._choose_vocab_size(vocab_size, vocab,
                                                  trg_reader)
        self.label_smoothing = label_smoothing

        self.output_projector = self.add_serializable_component(
            "output_projector", output_projector, lambda: output_projector or
            transforms.Linear(input_dim=self.input_dim,
                              output_dim=self.output_dim,
                              param_init=param_init,
                              bias_init=bias_init))
        self.coef_predictor = self.add_serializable_component(
            "coef_predictor", coef_predictor, lambda: coef_predictor or
            transforms.Linear(input_dim=self.input_dim,
                              output_dim=1,
                              param_init=param_init,
                              bias_init=bias_init))
        self.lexicon_file = lexicon_file
        self.lexicon_type = lexicon_type
        self.lexicon_alpha = lexicon_alpha

        assert lexicon_type in [
            "bias", "linear"
        ], "Lexicon type can be either 'bias' or 'linear' only!"
        # Reference to other parts of the model
        self.src_vocab = src_vocab
        self.trg_vocab = vocab if vocab is not None else trg_reader.vocab
        self.attender = attender
        # Sparse data structure to store exteranl lexicon prob
        self.lexicon = None
        # State of the sofmax
        self.lexicon_prob = None
        self.coeff = None
        self.dict_prob = None
コード例 #3
0
 def __init__(self,
              input_dim: int = Ref("exp_global.default_layer_dim"),
              hidden_dim: int = Ref("exp_global.default_layer_dim"),
              downsample_by: int = 1,
              param_init=Ref("exp_global.param_init",
                             default=bare(
                                 param_initializers.GlorotInitializer)),
              projection=None,
              batch_norm=None,
              nonlinearity=None):
     self.projection = self.add_serializable_component(
         "projection", projection,
         lambda: base.TransformSeqTransducer(modelparts_transforms.Linear(
             input_dim=input_dim * downsample_by,
             output_dim=hidden_dim,
             bias=False,
             param_init=param_init),
                                             downsample_by=downsample_by))
     self.batch_norm = self.add_serializable_component(
         "batch_norm", batch_norm,
         lambda: norms.BatchNorm(hidden_dim=hidden_dim, num_dim=2))
     self.nonlinearity = self.add_serializable_component(
         "nonlinearity", nonlinearity, lambda: base.TransformSeqTransducer(
             modelparts_transforms.Cwise("rectify")))
     self.modules = [self.projection, self.batch_norm, self.nonlinearity]
コード例 #4
0
    def __init__(self,
                 input_dim: numbers.Integral = Ref(
                     "exp_global.default_layer_dim"),
                 vocab_size: Optional[numbers.Integral] = None,
                 vocab: Optional[vocabs.Vocab] = None,
                 trg_reader: Optional[input_readers.InputReader] = Ref(
                     "model.trg_reader", default=None),
                 label_smoothing: numbers.Real = 0.0,
                 param_init: param_initializers.ParamInitializer = Ref(
                     "exp_global.param_init",
                     default=bare(param_initializers.GlorotInitializer)),
                 bias_init: param_initializers.ParamInitializer = Ref(
                     "exp_global.bias_init",
                     default=bare(param_initializers.ZeroInitializer)),
                 output_projector: transforms.Linear = None) -> None:
        self.param_col = param_collections.ParamManager.my_params(self)
        self.input_dim = input_dim
        self.output_dim = self._choose_vocab_size(vocab_size, vocab,
                                                  trg_reader)
        self.label_smoothing = label_smoothing

        self.output_projector = self.add_serializable_component(
            "output_projector", output_projector, lambda: output_projector or
            transforms.Linear(input_dim=self.input_dim,
                              output_dim=self.output_dim,
                              param_init=param_init,
                              bias_init=bias_init))
コード例 #5
0
ファイル: semi_disc.py プロジェクト: seeledu/xnmt-devel
    def __init__(self,
                 transducer: transducers.SeqTransducer,
                 input_dim: int = Ref("exp_global.default_layer_dim"),
                 softmax_dim: int = Ref("exp_global.default_layer_dim"),
                 layer_dim: int = Ref("exp_global.default_layer_dim"),
                 linear_layer: transforms.Linear = None,
                 vocab: Optional[vocabs.Vocab] = None,
                 scale: float = 1.0,
                 mode: str = "entropy",
                 param_init: param_initializers.ParamInitializer = Ref(
                     "exp_global.param_init",
                     default=bare(param_initializers.GlorotInitializer)),
                 bias_init: param_initializers.ParamInitializer = Ref(
                     "exp_global.bias_init",
                     default=bare(param_initializers.ZeroInitializer))):
        self.transducer = transducer
        self.input_dim = input_dim
        if vocab:
            softmax_dim = len(vocab)
        self.softmax_dim = softmax_dim
        self.layer_dim = layer_dim
        self.scale = scale
        self.mode = mode

        self.linear_layer = self.add_serializable_component(
            "linear_layer", linear_layer,
            lambda: transforms.Linear(input_dim=self.softmax_dim,
                                      output_dim=self.layer_dim,
                                      bias=False,
                                      param_init=param_init,
                                      bias_init=bias_init))
コード例 #6
0
 def __init__(self,
              input_dim: int,
              hidden_dim: int,
              nonlinearity: str = "rectify",
              linear_transforms: typing.Optional[typing.Sequence[
                  transforms.Linear]] = None,
              layer_norm: typing.Optional[norms.LayerNorm] = None) -> None:
     w_12 = self.add_serializable_component(
         "linear_transforms", linear_transforms, lambda: [
             transforms.Linear(input_dim, hidden_dim),
             transforms.Linear(hidden_dim, input_dim)
         ])
     self.w_1 = w_12[0]
     self.w_2 = w_12[1]
     self.layer_norm = self.add_serializable_component(
         "layer_norm", layer_norm, lambda: norms.LayerNorm(input_dim))
     self.nonlinearity = getattr(dy, nonlinearity)
コード例 #7
0
ファイル: loss_calculators.py プロジェクト: ustcmike/xnmt
 def __init__(self,
              baseline:Optional[Serializable]=None,
              evaluation_metric: metrics.SentenceLevelEvaluator = bare(metrics.FastBLEUEvaluator),
              search_strategy: search_strategies.SearchStrategy = bare(search_strategies.SamplingSearch),
              inv_eval: bool = True,
              decoder_hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim")) -> None:
   self.inv_eval = inv_eval
   self.search_strategy = search_strategy
   self.evaluation_metric = evaluation_metric
   self.baseline = self.add_serializable_component("baseline", baseline,
                                                   lambda: transforms.Linear(input_dim=decoder_hidden_dim, output_dim=1))
コード例 #8
0
ファイル: bow.py プロジェクト: seeledu/xnmt-devel
 def __init__(self,
              src_reader: input_readers.InputReader,
              trg_reader: input_readers.InputReader,
              src_embedder: embedders.Embedder = bare(embedders.SimpleWordEmbedder),
              encoder: transducers.SeqTransducer = bare(recurrent.BiLSTMSeqTransducer),
              inference=bare(inferences.IndependentOutputInference),
              hidden_dim: int = Ref("exp_global.default_layer_dim"),
              output_layer: Optional[transforms.Linear] = None,
              generate_per_step: bool = False,
              mode:str="avg_mlp"):
   super().__init__(src_reader=src_reader, trg_reader=trg_reader)
   self.src_embedder = src_embedder
   self.encoder = encoder
   self.output_layer = self.add_serializable_component("output_layer",
                                                       output_layer,
                                                       lambda:transforms.Linear(input_dim=hidden_dim,
                                                                            output_dim=len(trg_reader.vocab)))
   self.inference = inference
   self.mode = mode
   self.generate_per_step = generate_per_step
コード例 #9
0
ファイル: bridges.py プロジェクト: gmwe/xnmt
 def __init__(self,
              dec_layers: int = 1,
              enc_dim: int = Ref("exp_global.default_layer_dim"),
              dec_dim: int = Ref("exp_global.default_layer_dim"),
              param_init: param_initializers.ParamInitializer = Ref(
                  "exp_global.param_init",
                  default=bare(param_initializers.GlorotInitializer)),
              bias_init: param_initializers.ParamInitializer = Ref(
                  "exp_global.bias_init",
                  default=bare(param_initializers.ZeroInitializer)),
              projector=None):
     self.dec_layers = dec_layers
     self.enc_dim = enc_dim
     self.dec_dim = dec_dim
     self.projector = self.add_serializable_component(
         "projector", projector,
         lambda: transforms.Linear(input_dim=self.enc_dim,
                                   output_dim=self.dec_dim,
                                   param_init=param_init,
                                   bias_init=bias_init))
コード例 #10
0
ファイル: semi_disc.py プロジェクト: seeledu/xnmt-devel
    def __init__(
        self,
        input_dim=Ref("exp_global.default_layer_dim"),
        softmax_dim=Ref("exp_global.default_layer_dim"),
        output_dim=Ref("exp_global.default_layer_dim"),
        dropout=Ref("exp_global.dropout", default=0.0),
        residual=False,
        linear_layer=None,
        vocab=None,
        gumbel=False,
        param_init=Ref("exp_global.param_init",
                       default=bare(param_initializers.GlorotInitializer)),
        bias_init=Ref("exp_global.bias_init",
                      default=bare(param_initializers.ZeroInitializer))):
        param_col = param_collections.ParamManager.my_params(self)
        self.input_dim = input_dim
        if vocab:
            softmax_dim = len(vocab)
        self.softmax_dim = softmax_dim
        self.output_dim = output_dim
        self.dropout_rate = dropout
        self.residual = residual
        self.gumbel = gumbel
        if self.residual: assert self.input_dim == self.output_dim

        self.linear_layer = self.add_serializable_component(
            "linear_layer", linear_layer,
            lambda: transforms.Linear(input_dim=self.softmax_dim,
                                      output_dim=self.output_dim,
                                      bias=False,
                                      param_init=param_init,
                                      bias_init=bias_init))

        # self.p_W = param_col.add_parameters(dim=(softmax_dim, input_dim), init=param_init.initializer((softmax_dim, input_dim)))
        # self.p_b = param_col.add_parameters(dim=(softmax_dim), init=bias_init.initializer((softmax_dim,)))
        self.p_E = param_col.add_parameters(dim=(output_dim, softmax_dim),
                                            init=param_init.initializer(
                                                (output_dim, softmax_dim)))
コード例 #11
0
    def __init__(self,
                 head_count: int,
                 model_dim: int,
                 downsample_factor: int = 1,
                 input_dim: int = None,
                 ignore_masks: bool = False,
                 plot_attention: typing.Optional[str] = None,
                 diag_gauss_mask: typing.Union[bool, numbers.Real] = False,
                 square_mask_std: bool = True,
                 cross_pos_encoding_type: typing.Optional[str] = None,
                 kq_pos_encoding_type: typing.Optional[str] = None,
                 kq_pos_encoding_size: int = 40,
                 max_len: int = 1500,
                 param_init: xnmt.param_initializers.ParamInitializer = xnmt.
                 param_initializers.GlorotInitializer(),
                 bias_init: xnmt.param_initializers.ParamInitializer = xnmt.
                 param_initializers.ZeroInitializer(),
                 linear_kvq=None,
                 kq_positional_embedder=None,
                 layer_norm=None,
                 res_shortcut=None,
                 desc: typing.Any = None) -> None:
        if input_dim is None: input_dim = model_dim
        self.input_dim = input_dim
        assert model_dim % head_count == 0
        self.dim_per_head = model_dim // head_count
        self.model_dim = model_dim
        self.head_count = head_count
        assert downsample_factor >= 1
        self.downsample_factor = downsample_factor
        self.plot_attention = plot_attention
        self.plot_attention_counter = 0
        self.desc = desc

        self.ignore_masks = ignore_masks
        self.diag_gauss_mask = diag_gauss_mask
        self.square_mask_std = square_mask_std

        self.kq_pos_encoding_type = kq_pos_encoding_type
        self.kq_pos_encoding_size = kq_pos_encoding_size
        self.max_len = max_len

        subcol = param_collections.ParamManager.my_params(self)

        if self.kq_pos_encoding_type is None:
            self.linear_kvq = self.add_serializable_component(
                "linear_kvq", linear_kvq,
                lambda: transforms.Linear(input_dim * downsample_factor,
                                          head_count * self.dim_per_head * 3,
                                          param_init=param_init,
                                          bias_init=bias_init))
        else:
            self.linear_kq, self.linear_v = \
              self.add_serializable_component("linear_kvq",
                                              linear_kvq,
                                              lambda: [
                                                transforms.Linear(input_dim * downsample_factor + self.kq_pos_encoding_size,
                                                                  head_count * self.dim_per_head * 2, param_init=param_init,
                                                                  bias_init=bias_init),
                                                transforms.Linear(input_dim * downsample_factor, head_count * self.dim_per_head,
                                                                  param_init=param_init, bias_init=bias_init)])
            assert self.kq_pos_encoding_type == "embedding"
            self.kq_positional_embedder = self.add_serializable_component(
                "kq_positional_embedder", kq_positional_embedder, lambda:
                embedders.PositionEmbedder(max_pos=self.max_len,
                                           emb_dim=self.kq_pos_encoding_size,
                                           param_init=param_init))

        if self.diag_gauss_mask:
            if self.diag_gauss_mask == "rand":
                rand_init = np.exp(
                    (np.random.random(size=(self.head_count, ))) *
                    math.log(1000))
                self.diag_gauss_mask_sigma = subcol.add_parameters(
                    dim=(1, 1, self.head_count),
                    init=dy.NumpyInitializer(rand_init))
            else:
                self.diag_gauss_mask_sigma = subcol.add_parameters(
                    dim=(1, 1, self.head_count),
                    init=dy.ConstInitializer(self.diag_gauss_mask))

        self.layer_norm = self.add_serializable_component(
            "layer_norm", layer_norm, lambda: norms.LayerNorm(model_dim))

        if model_dim != input_dim * downsample_factor:
            self.res_shortcut = self.add_serializable_component(
                "res_shortcut", res_shortcut,
                lambda: transforms.Linear(input_dim * downsample_factor,
                                          model_dim,
                                          param_init=param_init,
                                          bias_init=bias_init))
        self.cross_pos_encoding_type = cross_pos_encoding_type
        if cross_pos_encoding_type == "embedding":
            self.cross_pos_emb_p1 = subcol.add_parameters(
                dim=(self.max_len, self.dim_per_head, self.head_count),
                init=dy.NormalInitializer(mean=1.0, var=0.001))
            self.cross_pos_emb_p2 = subcol.add_parameters(
                dim=(self.max_len, self.dim_per_head, self.head_count),
                init=dy.NormalInitializer(mean=1.0, var=0.001))
        elif cross_pos_encoding_type is not None:
            raise NotImplementedError()
コード例 #12
0
 def __init__(
     self,
     trg_embedder: embedders.DenseWordEmbedder,
     src_reader: input_readers.InputReader = None,
     trg_reader: input_readers.InputReader = None,
     src_embedder=bare(embedders.SimpleWordEmbedder),
     encoder=bare(recurrent.BiLSTMSeqTransducer),
     attender=bare(attenders.MlpAttender),
     dec_lstm=bare(recurrent.UniLSTMSeqTransducer),
     bridge: bridges.Bridge = bare(bridges.CopyBridge),
     transform: transforms.Transform = bare(transforms.AuxNonLinear),
     scorer: scorers.Scorer = bare(scorers.Softmax),
     inference=bare(inferences.IndependentOutputInference),
     max_dec_len: int = 350,
     mode: Optional[str] = None,
     mode_translate: Optional[str] = None,
     mode_transduce: Optional[str] = None,
     unfold_until: str = "eos",
     transducer_loss: bool = False,
     split_regularizer: Union[bool, numbers.Real] = False,
     split_dual: Union[bool, Sequence[numbers.Real]] = False,
     dropout_dec_state: float = 0.0,
     split_dual_proj: Optional[transforms.Linear] = None,
     split_context_transform: Optional[transforms.Transform] = None,
     sampling_prob: numbers.Number = 0.0,
     compute_report: bool = Ref("exp_global.compute_report",
                                default=False)):
     super().__init__(src_reader=src_reader, trg_reader=trg_reader)
     assert mode is None or (mode_translate is None and mode_transduce is None), \
       f"illegal combination: mode={mode}, mode_translate={mode_translate}, mode_transduce={mode_transduce}"
     assert mode or mode_translate or mode_transduce
     if mode_translate or mode_transduce:
         assert mode_translate and mode_transduce
     assert mode_translate != "split"
     self.src_embedder = src_embedder
     self.trg_embedder = trg_embedder
     self.encoder = encoder
     self.attender = attender
     self.dec_lstm = dec_lstm
     self.bridge = bridge
     self.transform = transform
     self.scorer = scorer
     self.inference = inference
     self.max_dec_len = max_dec_len
     self.mode_translate = mode_translate or mode
     self.mode_transduce = mode_transduce or mode
     if transducer_loss:
         assert self.mode_transduce in ["teacher", "split"], \
           f"mode_transduce='{self.mode_transduce}' not supported with transducer_loss option"
     self.trg_embedder = trg_embedder
     self.unfold_until = unfold_until
     self.transducer_loss = transducer_loss
     if split_regularizer: assert self.mode_transduce == "split"
     self.split_regularizer = split_regularizer
     self.dropout_dec_state = dropout_dec_state
     self.split_dual = [0.0, 0.0] if split_dual is True else split_dual
     self.split_context_transform = split_context_transform
     if self.split_dual:
         assert len(self.split_dual) == 2 and max(
             self.split_dual) <= 1.0 and min(self.split_dual) >= 0.0
         self.split_dual_proj = self.add_serializable_component(
             "split_dual_proj", split_dual_proj, lambda: transforms.Linear(
                 input_dim=self.dec_lstm.input_dim * 2,
                 output_dim=self.dec_lstm.input_dim))
     self.sampling_prob = sampling_prob
     self.compute_report = compute_report