def __init__(self, dy_model, input_dim, output_dim): self.L = Linear(input_dim, output_dim, dy_model, bias=False, param_init=LeCunUniformInitializer(), bias_init=LeCunUniformInitializer()) self.output_dim = output_dim
def __init__(self, policy_network=None, baseline=None, z_normalization=True, conf_penalty=None, sample=1, weight=1.0, use_baseline=True, input_dim=Ref("exp_global.default_layer_dim"), output_dim=2, param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer))): self.input_dim = input_dim self.policy_network = self.add_serializable_component( "policy_network", policy_network, lambda: Linear(input_dim=self.input_dim, output_dim=output_dim, param_init=param_init, bias_init=bias_init)) if use_baseline: self.baseline = self.add_serializable_component( "baseline", baseline, lambda: Linear(input_dim=self.input_dim, output_dim=1, param_init=param_init, bias_init=bias_init)) else: self.baseline = None self.confidence_penalty = self.add_serializable_component( "conf_penalty", conf_penalty, lambda: conf_penalty) if conf_penalty is not None else None self.z_normalization = z_normalization self.sample = sample self.weight = weight
def __init__(self, dec_layers=1, enc_dim=Ref("exp_global.default_layer_dim"), dec_dim=Ref("exp_global.default_layer_dim"), param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer)), projector=None): self.dec_layers = dec_layers self.enc_dim = enc_dim self.dec_dim = dec_dim self.projector = self.add_serializable_component( "projector", projector, lambda: Linear(input_dim=self.enc_dim, output_dim=self.dec_dim, param_init=param_init, bias_init=bias_init))
def __init__(self, evaluation_metric=None, sample_length=50, use_baseline=False, inv_eval=True, decoder_hidden_dim=Ref("exp_global.default_layer_dim"), baseline=None): self.use_baseline = use_baseline self.inv_eval = inv_eval if evaluation_metric is None: self.evaluation_metric = xnmt.evaluator.FastBLEUEvaluator(ngram=4, smooth=1) else: self.evaluation_metric = evaluation_metric if self.use_baseline: self.baseline = self.add_serializable_component( "baseline", baseline, lambda: Linear(input_dim=decoder_hidden_dim, output_dim=1))
def __init__(self, word_vocab=None, ngram_size=4, src_vocab=Ref(Path("model.src_reader.vocab")), hidden_dim=Ref("exp_global.default_layer_dim"), word_ngram=None, vocab_size=None): super().__init__() if word_vocab is None: word_vocab = Vocab() dict_entry = vocab_size else: word_vocab.freeze() word_vocab.set_unk(word_vocab.UNK_STR) dict_entry = len(word_vocab) self.dict_entry = dict_entry self.src_vocab = src_vocab self.word_vocab = word_vocab self.ngram_size = ngram_size self.word_ngram = self.add_serializable_component( "word_ngram", word_ngram, lambda: Linear(input_dim=dict_entry, output_dim=hidden_dim))
def __init__(self, input_dim: int = Ref("exp_global.default_layer_dim"), vocab_size: Optional[int] = None, vocab: Optional[vocab.Vocab] = None, trg_reader: Optional[input_reader.InputReader] = Ref( "model.trg_reader", default=None), label_smoothing: float = 0.0, param_init: ParamInitializer = Ref( "exp_global.param_init", default=bare(GlorotInitializer)), bias_init: ParamInitializer = Ref( "exp_global.bias_init", default=bare(ZeroInitializer)), output_projector: Linear = None) -> None: self.param_col = ParamManager.my_params(self) self.input_dim = input_dim self.output_dim = self._choose_vocab_size(vocab_size, vocab, trg_reader) self.label_smoothing = label_smoothing self.output_projector = self.add_serializable_component( "output_projector", output_projector, lambda: output_projector or Linear(input_dim=self.input_dim, output_dim=self.output_dim, param_init=param_init, bias_init=bias_init))
def setUp(self): # Seeding numpy.random.seed(2) random.seed(2) layer_dim = 64 xnmt.events.clear() ParamManager.init_param_col() self.segment_encoder_bilstm = BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim) self.segment_composer = SumComposer() self.src_reader = CharFromWordTextReader() self.trg_reader = PlainTextReader() self.loss_calculator = AutoRegressiveMLELoss() baseline = Linear(input_dim=layer_dim, output_dim=1) policy_network = Linear(input_dim=layer_dim, output_dim=2) self.poisson_prior = PoissonPrior(mu=3.3) self.eps_greedy = EpsilonGreedy(eps_prob=0.0, prior=self.poisson_prior) self.conf_penalty = ConfidencePenalty() self.policy_gradient = PolicyGradient(input_dim=layer_dim, output_dim=2, baseline=baseline, policy_network=policy_network, z_normalization=True, conf_penalty=self.conf_penalty, sample=5) self.length_prior = PoissonLengthPrior(lmbd=3.3, weight=1) self.segmenting_encoder = SegmentingSeqTransducer( embed_encoder=self.segment_encoder_bilstm, segment_composer=self.segment_composer, final_transducer=BiLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim), policy_learning=self.policy_gradient, eps_greedy=self.eps_greedy, length_prior=self.length_prior, ) self.model = DefaultTranslator( src_reader=self.src_reader, trg_reader=self.trg_reader, src_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), encoder=self.segmenting_encoder, attender=MlpAttender(input_dim=layer_dim, state_dim=layer_dim, hidden_dim=layer_dim), trg_embedder=SimpleWordEmbedder(emb_dim=layer_dim, vocab_size=100), decoder=AutoRegressiveDecoder( input_dim=layer_dim, rnn=UniLSTMSeqTransducer(input_dim=layer_dim, hidden_dim=layer_dim, decoder_input_dim=layer_dim, yaml_path="decoder"), transform=AuxNonLinear(input_dim=layer_dim, output_dim=layer_dim, aux_input_dim=layer_dim), scorer=Softmax(vocab_size=100, input_dim=layer_dim), trg_embed_dim=layer_dim, bridge=CopyBridge(dec_dim=layer_dim, dec_layers=1)), ) self.model.set_train(True) self.layer_dim = layer_dim self.src_data = list( self.model.src_reader.read_sents("examples/data/head.ja")) self.trg_data = list( self.model.trg_reader.read_sents("examples/data/head.en")) my_batcher = xnmt.batcher.TrgBatcher(batch_size=3, src_pad_token=1, trg_pad_token=2) self.src, self.trg = my_batcher.pack(self.src_data, self.trg_data) dy.renew_cg(immediate_compute=True, check_validity=True)