def __init__(self, src_reader: input_readers.InputReader, trg_reader: input_readers.InputReader, src_embedder: embedders.Embedder = bare( embedders.SimpleWordEmbedder), encoder: recurrent.UniLSTMSeqTransducer = bare( recurrent.UniLSTMSeqTransducer), attender: attenders.Attender = bare(attenders.MlpAttender), decoder: decoders.Decoder = bare( decoders.AutoRegressiveDecoder), inference: inferences.AutoRegressiveInference = bare( inferences.AutoRegressiveInference), truncate_dec_batches: bool = False, policy_learning=None, freeze_decoder_param=False, max_generation=100) -> None: super().__init__(src_reader=src_reader, trg_reader=trg_reader, encoder=encoder, attender=attender, src_embedder=src_embedder, decoder=decoder, inference=inference, truncate_dec_batches=truncate_dec_batches) self.policy_learning = policy_learning self.actions = [] self.outputs = [] self.freeze_decoder_param = freeze_decoder_param self.max_generation = max_generation
def __init__( self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), state_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)) ) -> None: self.input_dim = input_dim self.state_dim = state_dim self.hidden_dim = hidden_dim my_params = param_collections.ParamManager.my_params(self) self.linear_context = nn.Linear(input_dim, hidden_dim, bias=True).to(xnmt.device) self.linear_query = nn.Linear(state_dim, hidden_dim, bias=False).to(xnmt.device) self.pU = nn.Linear(hidden_dim, 1, bias=False).to(xnmt.device) my_params.append(self.linear_context) my_params.append(self.linear_query) my_params.append(self.pU) my_params.init_params(param_init, bias_init) self.curr_sent = None self.attention_vecs = None self.WI = None
def __init__( self, layers: numbers.Integral, input_dim: numbers.Integral, hidden_dim: numbers.Integral, param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)) ) -> None: if layers != 1: raise RuntimeError( "CustomLSTMSeqTransducer supports only exactly one layer") self.input_dim = input_dim self.hidden_dim = hidden_dim model = param_collections.ParamManager.my_params(self) # [i; f; o; g] self.p_Wx = model.add_parameters(dim=(hidden_dim * 4, input_dim), init=param_init.initializer( (hidden_dim * 4, input_dim))) self.p_Wh = model.add_parameters(dim=(hidden_dim * 4, hidden_dim), init=param_init.initializer( (hidden_dim * 4, hidden_dim))) self.p_b = model.add_parameters(dim=(hidden_dim * 4, ), init=bias_init.initializer( (hidden_dim * 4, )))
def __init__( self, model: models.ConditionedModel = Ref("model"), src_file: Union[None, str, Sequence[str]] = None, trg_file: Optional[str] = None, dev_every: numbers.Integral = 0, dev_zero: bool = False, batcher: batchers.Batcher = bare(batchers.SrcBatcher, batch_size=32), loss_calculator: loss_calculators.LossCalculator = bare( loss_calculators.MLELoss), trainer: optimizers.XnmtOptimizer = bare(optimizers.SimpleSGDTrainer, e0=0.1), run_for_epochs: Optional[numbers.Integral] = None, lr_decay: numbers.Real = 1.0, lr_decay_times: numbers.Integral = 3, patience: numbers.Integral = 1, initial_patience: Optional[numbers.Integral] = None, dev_tasks: Sequence[eval_tasks.EvalTask] = None, dev_combinator: Optional[str] = None, restart_trainer: bool = False, reload_command: Optional[str] = None, name: str = "{EXP}", sample_train_sents: Optional[numbers.Integral] = None, max_num_train_sents: Optional[numbers.Integral] = None, max_src_len: Optional[numbers.Integral] = None, max_trg_len: Optional[numbers.Integral] = None, loss_comb_method: str = Ref("exp_global.loss_comb_method", default="sum"), update_every: numbers.Integral = 1, commandline_args: dict = Ref("exp_global.commandline_args", default={}) ) -> None: super().__init__(model=model, src_file=src_file, trg_file=trg_file, dev_every=dev_every, batcher=batcher, loss_calculator=loss_calculator, run_for_epochs=run_for_epochs, lr_decay=lr_decay, lr_decay_times=lr_decay_times, patience=patience, initial_patience=initial_patience, dev_tasks=dev_tasks, dev_combinator=dev_combinator, restart_trainer=restart_trainer, reload_command=reload_command, name=name, sample_train_sents=sample_train_sents, max_num_train_sents=max_num_train_sents, max_src_len=max_src_len, max_trg_len=max_trg_len) self.dev_zero = dev_zero self.trainer = trainer or optimizers.SimpleSGDTrainer(e0=0.1) self.dynet_profiling = commandline_args.get( "dynet_profiling", 0) if commandline_args else 0 self.train_loss_tracker = loss_trackers.TrainLossTracker(self) self.loss_comb_method = loss_comb_method self.update_every = update_every self.num_updates_skipped = 0
def __init__(self, layers, input_dim, hidden_dim, param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer))): if layers != 1: raise RuntimeError( "CustomLSTMSeqTransducer supports only exactly one layer") self.input_dim = input_dim self.hidden_dim = hidden_dim model = ParamManager.my_params(self) # [i; f; o; g] self.p_Wx = model.add_parameters(dim=(hidden_dim * 4, input_dim), init=param_init.initializer( (hidden_dim * 4, input_dim))) self.p_Wh = model.add_parameters(dim=(hidden_dim * 4, hidden_dim), init=param_init.initializer( (hidden_dim * 4, hidden_dim))) self.p_b = model.add_parameters(dim=(hidden_dim * 4, ), init=bias_init.initializer( (hidden_dim * 4, )))
def __init__( self, layers, input_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), dropout=Ref("exp_global.dropout", default=0.0), stride=1, filter_width=2, param_init=Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)), forward_layers=None, backward_layers=None): self.num_layers = layers self.hidden_dim = hidden_dim assert hidden_dim % 2 == 0 self.forward_layers = self.add_serializable_component( "forward_layers", forward_layers, lambda: self.init_layers(input_dim, hidden_dim, dropout, stride, filter_width, param_init, bias_init)) self.backward_layers = self.add_serializable_component( "backward_layers", backward_layers, lambda: self.init_layers(input_dim, hidden_dim, dropout, stride, filter_width, param_init, bias_init))
def __init__(self, input_dim=Ref("exp_global.default_layer_dim"), trg_embed_dim=Ref("exp_global.default_layer_dim"), input_feeding=True, rnn_layer=bare(UniLSTMSeqTransducer), mlp_layer=bare(MLP), bridge=bare(CopyBridge), label_smoothing=0.0): self.param_col = ParamManager.my_params(self) self.input_dim = input_dim self.label_smoothing = label_smoothing # Input feeding self.input_feeding = input_feeding rnn_input_dim = trg_embed_dim if input_feeding: rnn_input_dim += input_dim assert rnn_input_dim == rnn_layer.input_dim, "Wrong input dimension in RNN layer" # Bridge self.bridge = bridge # LSTM self.rnn_layer = rnn_layer # MLP self.mlp_layer = mlp_layer
def __init__(self, word_vocab=None, embedding=None, ngram_size=4, vocab_size=32000, cache_id_pool=None, cache_word_table=None, char_vocab=Ref(Path("model.src_reader.vocab")), hidden_dim=Ref("exp_global.default_layer_dim"), param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer))): super().__init__(word_vocab, vocab_size, cache_id_pool, cache_word_table) # Attributes if word_vocab is None: self.dict_entry = vocab_size+1 else: self.dict_entry = len(word_vocab) self.char_vocab = char_vocab self.param_init = param_init self.bias_init = bias_init self.hidden_dim = hidden_dim self.word_vect = None # Word Embedding self.ngram_size = ngram_size self.embedding = self.add_serializable_component("embedding", embedding, lambda: Linear(input_dim=self.dict_entry, output_dim=hidden_dim, param_init=param_init, bias_init=bias_init))
def __init__(self, layers, input_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), stride=1, dropout=Ref("exp_global.dropout", default=0.0), lstm_layers=None, nin_layers=None, param_init_lstm=Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init_lstm=Ref("exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)), param_init_nin=Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer))): """ """ assert layers > 0 assert hidden_dim % 2 == 0 self.lstm_layers = [] self.hidden_dim = hidden_dim self.stride=stride self.lstm_layers = self.add_serializable_component("lstm_layers", lstm_layers, lambda: self.init_lstm_layers(layers, input_dim, hidden_dim, dropout, param_init_lstm, bias_init_lstm)) self.nin_layers = self.add_serializable_component("nin_layers", nin_layers, lambda: self.init_nin_layers(layers, hidden_dim, param_init_nin))
def __init__(self, model, src_file=None, trg_file=None, dev_every=0, batcher=bare(SrcBatcher, batch_size=32), loss_calculator=bare(MLELoss), run_for_epochs=None, lr_decay=1.0, lr_decay_times=3, patience=1, initial_patience=None, dev_tasks=None, restart_trainer=False, reload_command=None, name=None, sample_train_sents: Optional[int] = None, max_num_train_sents=None, max_src_len=None, max_trg_len=None): self.src_file = src_file self.trg_file = trg_file self.dev_tasks = dev_tasks if lr_decay > 1.0 or lr_decay <= 0.0: raise RuntimeError("illegal lr_decay, must satisfy: 0.0 < lr_decay <= 1.0") self.lr_decay = lr_decay self.patience = patience self.initial_patience = initial_patience self.lr_decay_times = lr_decay_times self.restart_trainer = restart_trainer self.run_for_epochs = run_for_epochs self.early_stopping_reached = False # training state self.training_state = TrainingState() self.reload_command = reload_command self.model = model self.loss_calculator = loss_calculator self.sample_train_sents = sample_train_sents self.max_num_train_sents = max_num_train_sents self.max_src_len = max_src_len self.max_trg_len = max_trg_len self.batcher = batcher self.logger = BatchLossTracker(self, dev_every, name)
def __init__( self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), embedder: embedders.Embedder = bare(embedders.LookupEmbedder), input_feeding: bool = True, bridge: bridges.Bridge = bare(bridges.CopyBridge), rnn: recurrent.UniLSTMSeqTransducer = bare( recurrent.UniLSTMSeqTransducer), transform: transforms.Transform = bare(transforms.AuxNonLinear), scorer: scorers.Scorer = bare(scorers.Softmax), truncate_dec_batches: bool = Ref("exp_global.truncate_dec_batches", default=False) ) -> None: self.param_col = param_collections.ParamManager.my_params(self) self.input_dim = input_dim self.embedder = embedder self.truncate_dec_batches = truncate_dec_batches self.bridge = bridge self.rnn = rnn self.transform = transform self.scorer = scorer # Input feeding self.input_feeding = input_feeding rnn_input_dim = embedder.emb_dim if input_feeding: rnn_input_dim += input_dim assert rnn_input_dim == rnn.total_input_dim, "Wrong input dimension in RNN layer: {} != {}".format( rnn_input_dim, rnn.total_input_dim)
def __init__(self, emb_dim=Ref("exp_global.default_layer_dim"), weight_noise=Ref("exp_global.weight_noise", default=0.0), word_dropout=0.0, fix_norm=None, param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)), vocab_size=None, vocab=None, yaml_path=None, src_reader=Ref("model.src_reader", default=None), trg_reader=Ref("model.trg_reader", default=None)): self.fix_norm = fix_norm self.weight_noise = weight_noise self.word_dropout = word_dropout self.emb_dim = emb_dim param_collection = param_collections.ParamManager.my_params(self) self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path, src_reader, trg_reader) self.save_processed_arg("vocab_size", self.vocab_size) self.embeddings = param_collection.add_parameters( (self.vocab_size, self.emb_dim), init=param_init.initializer((self.vocab_size, self.emb_dim), is_lookup=True)) self.bias = param_collection.add_parameters((self.vocab_size, ), init=bias_init.initializer( (self.vocab_size, )))
def __init__( self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), dropout: numbers.Real = Ref("exp_global.dropout", default=0.0), param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)), num_heads: numbers.Integral = 8): assert (input_dim % num_heads == 0) self.dropout = dropout param_collection = param_collections.ParamManager.my_params(self) self.input_dim = input_dim self.num_heads = num_heads self.head_dim = input_dim // num_heads self.pWq, self.pWk, self.pWv, self.pWo = [ param_collection.add_parameters(dim=(input_dim, input_dim), init=param_init.initializer( (input_dim, input_dim))) for _ in range(4) ] self.pbq, self.pbk, self.pbv, self.pbo = [ param_collection.add_parameters(dim=(1, input_dim), init=bias_init.initializer(( 1, input_dim, ))) for _ in range(4) ]
def __init__(self, transducer: transducers.SeqTransducer, input_dim: int = Ref("exp_global.default_layer_dim"), softmax_dim: int = Ref("exp_global.default_layer_dim"), layer_dim: int = Ref("exp_global.default_layer_dim"), linear_layer: transforms.Linear = None, vocab: Optional[vocabs.Vocab] = None, scale: float = 1.0, mode: str = "entropy", param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer))): self.transducer = transducer self.input_dim = input_dim if vocab: softmax_dim = len(vocab) self.softmax_dim = softmax_dim self.layer_dim = layer_dim self.scale = scale self.mode = mode self.linear_layer = self.add_serializable_component( "linear_layer", linear_layer, lambda: transforms.Linear(input_dim=self.softmax_dim, output_dim=self.layer_dim, bias=False, param_init=param_init, bias_init=bias_init))
def __init__(self, layers=1, input_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), dropout=Ref("exp_global.dropout", default=0.0), weightnoise_std=Ref("exp_global.weight_noise", default=0.0), param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer)), forward_layers=None, backward_layers=None): self.num_layers = layers self.hidden_dim = hidden_dim self.dropout_rate = dropout self.weightnoise_std = weightnoise_std assert hidden_dim % 2 == 0 self.forward_layers = self.add_serializable_component("forward_layers", forward_layers, lambda: [ UniLSTMSeqTransducer(input_dim=input_dim if i == 0 else hidden_dim, hidden_dim=hidden_dim / 2, dropout=dropout, weightnoise_std=weightnoise_std, param_init=param_init[i] if isinstance(param_init, Sequence) else param_init, bias_init=bias_init[i] if isinstance(bias_init, Sequence) else bias_init) for i in range(layers)]) self.backward_layers = self.add_serializable_component("backward_layers", backward_layers, lambda: [ UniLSTMSeqTransducer(input_dim=input_dim if i == 0 else hidden_dim, hidden_dim=hidden_dim / 2, dropout=dropout, weightnoise_std=weightnoise_std, param_init=param_init[i] if isinstance(param_init, Sequence) else param_init, bias_init=bias_init[i] if isinstance(bias_init, Sequence) else bias_init) for i in range(layers)])
def __init__( self, policy_network=None, baseline=None, z_normalization=True, conf_penalty=None, weight=1.0, input_dim=Ref("exp_global.default_layer_dim"), output_dim=2, param_init=Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(param_initializers.ZeroInitializer))): self.input_dim = input_dim self.policy_network = self.add_serializable_component( "policy_network", policy_network, lambda: transforms.Linear(input_dim=self.input_dim, output_dim=output_dim, param_init=param_init, bias_init=bias_init)) self.baseline = self.add_serializable_component( "baseline", baseline, lambda: transforms.Linear(input_dim=self.input_dim, output_dim=1, param_init=param_init, bias_init=bias_init)) self.confidence_penalty = self.add_serializable_component( "conf_penalty", conf_penalty, lambda: conf_penalty) if conf_penalty is not None else None self.weight = weight self.z_normalization = z_normalization
def __init__(self, layers=1, input_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), dropout = Ref("exp_global.dropout", default=0.0), weightnoise_std=Ref("exp_global.weight_noise", default=0.0), param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer)), yaml_path=None, decoder_input_dim=Ref("exp_global.default_layer_dim", default=None), decoder_input_feeding=True): self.num_layers = layers model = ParamManager.my_params(self) if yaml_path is not None and "decoder" in yaml_path: if decoder_input_feeding: input_dim += decoder_input_dim self.hidden_dim = hidden_dim self.dropout_rate = dropout self.weightnoise_std = weightnoise_std self.input_dim = input_dim if not isinstance(param_init, Sequence): param_init = [param_init] * layers if not isinstance(bias_init, Sequence): bias_init = [bias_init] * layers # [i; f; o; g] self.p_Wx = [model.add_parameters(dim=(hidden_dim*4, input_dim), init=param_init[0].initializer((hidden_dim*4, input_dim), num_shared=4))] self.p_Wx += [model.add_parameters(dim=(hidden_dim*4, hidden_dim), init=param_init[i].initializer((hidden_dim*4, hidden_dim), num_shared=4)) for i in range(1, layers)] self.p_Wh = [model.add_parameters(dim=(hidden_dim*4, hidden_dim), init=param_init[i].initializer((hidden_dim*4, hidden_dim), num_shared=4)) for i in range(layers)] self.p_b = [model.add_parameters(dim=(hidden_dim*4,), init=bias_init[i].initializer((hidden_dim*4,), num_shared=4)) for i in range(layers)] self.dropout_mask_x = None self.dropout_mask_h = None
def __init__( self, emb_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), weight_noise: numbers.Real = Ref("exp_global.weight_noise", default=0.0), word_dropout: numbers.Real = 0.0, fix_norm: Optional[numbers.Real] = None, param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)), vocab_size: Optional[numbers.Integral] = None, vocab: Optional[vocabs.Vocab] = None, yaml_path: Path = '', src_reader: Optional[input_readers.InputReader] = Ref( "model.src_reader", default=None), trg_reader: Optional[input_readers.InputReader] = Ref( "model.trg_reader", default=None) ) -> None: self.fix_norm = fix_norm self.weight_noise = weight_noise self.word_dropout = word_dropout self.emb_dim = emb_dim my_params = param_collections.ParamManager.my_params(self) self.vocab_size = self.choose_vocab_size(vocab_size, vocab, yaml_path, src_reader, trg_reader) self.save_processed_arg("vocab_size", self.vocab_size) self.linear = torch.nn.Linear(in_features=emb_dim, out_features=self.vocab_size, bias=True).to(xnmt.device) my_params.append(self.linear) my_params.init_params(param_init, bias_init)
def __init__( self, input_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), dropout=Ref("exp_global.dropout", default=0.0), filter_width=2, stride=1, param_init=Ref("exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(param_initializers.ZeroInitializer))): model = param_collections.ParamManager.my_params(self) self.hidden_dim = hidden_dim self.dropout = dropout self.input_dim = input_dim self.stride = stride dim_f = (filter_width, 1, input_dim, hidden_dim * 3) self.p_f = model.add_parameters(dim=dim_f, init=param_init.initializer( dim_f, num_shared=3)) # f, o, z dim_b = (hidden_dim * 3, ) self.p_b = model.add_parameters(dim=dim_b, init=bias_init.initializer( dim_b, num_shared=3))
def __init__( self, input_dim: int = Ref("exp_global.default_layer_dim"), trg_embed_dim: int = Ref("exp_global.default_layer_dim"), input_feeding: bool = True, bridge: Bridge = bare(CopyBridge), rnn: UniLSTMSeqTransducer = bare(UniLSTMSeqTransducer), transform: Transform = bare(AuxNonLinear), scorer: Scorer = bare(Softmax), truncate_dec_batches: bool = Ref("exp_global.truncate_dec_batches", default=False) ) -> None: self.param_col = ParamManager.my_params(self) self.input_dim = input_dim self.truncate_dec_batches = truncate_dec_batches self.bridge = bridge self.rnn = rnn self.transform = transform self.scorer = scorer # Input feeding self.input_feeding = input_feeding rnn_input_dim = trg_embed_dim if input_feeding: rnn_input_dim += input_dim assert rnn_input_dim == rnn.input_dim, "Wrong input dimension in RNN layer: {} != {}".format( rnn_input_dim, rnn.input_dim)
def __init__(self, src_file: Union[str, Sequence[str]], ref_file: Optional[str] = None, model: 'model_base.GeneratorModel' = Ref("model"), batcher: Batcher = Ref("train.batcher", default=bare(xnmt.batchers.SrcBatcher, batch_size=32)), loss_calculator: LossCalculator = bare(MLELoss), max_src_len: Optional[int] = None, max_trg_len: Optional[int] = None, max_num_sents: Optional[int] = None, loss_comb_method: str = Ref("exp_global.loss_comb_method", default="sum"), desc: Any = None): self.model = model self.loss_calculator = loss_calculator self.src_file = src_file self.ref_file = ref_file self.batcher = batcher self.src_data = None self.max_src_len = max_src_len self.max_trg_len = max_trg_len self.max_num_sents = max_num_sents self.loss_comb_method = loss_comb_method self.desc = desc
def __init__( self, embed_encoder: transducers_base.SeqTransducer = bare( transducers_base.IdentitySeqTransducer), segment_composer: SequenceComposer = bare(SeqTransducerComposer), final_transducer: recurrent.BiLSTMSeqTransducer = bare( recurrent.BiLSTMSeqTransducer), policy_network: network.PolicyNetwork = None, policy_prior: prior.PolicyPrior = None, train_policy_oracle: bool = True, test_policy_oracle: bool = True): policy_network = self.add_serializable_component( "policy_network", policy_network, lambda: policy_network) model_base.PolicyConditionedModel.__init__(policy_network, train_policy_oracle, test_policy_oracle) self.embed_encoder = self.add_serializable_component( "embed_encoder", embed_encoder, lambda: embed_encoder) self.segment_composer = self.add_serializable_component( "segment_composer", segment_composer, lambda: segment_composer) self.final_transducer = self.add_serializable_component( "final_transducer", final_transducer, lambda: final_transducer) self.no_char_embed = issubclass(segment_composer.__class__, VocabBasedComposer) self.policy_prior = self.policy_prior
def __init__(self, input_dim: numbers.Integral = Ref( "exp_global.default_layer_dim"), vocab_size: Optional[numbers.Integral] = None, vocab: Optional[vocabs.Vocab] = None, trg_reader: Optional[input_readers.InputReader] = Ref( "model.trg_reader", default=None), label_smoothing: numbers.Real = 0.0, param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)), output_projector: transforms.Linear = None) -> None: self.param_col = param_collections.ParamManager.my_params(self) self.input_dim = input_dim self.output_dim = self._choose_vocab_size(vocab_size, vocab, trg_reader) self.label_smoothing = label_smoothing self.output_projector = self.add_serializable_component( "output_projector", output_projector, lambda: output_projector or transforms.Linear(input_dim=self.input_dim, output_dim=self.output_dim, param_init=param_init, bias_init=bias_init))
def __init__(self, embed_encoder=bare(IdentitySeqTransducer), segment_composer=bare(SeqTransducerComposer), final_transducer=bare(BiLSTMSeqTransducer), policy_learning=None, length_prior=None, eps_greedy=None, sample_during_search=False, reporter=None, compute_report=Ref("exp_global.compute_report", default=False)): self.embed_encoder = self.add_serializable_component( "embed_encoder", embed_encoder, lambda: embed_encoder) self.segment_composer = self.add_serializable_component( "segment_composer", segment_composer, lambda: segment_composer) self.final_transducer = self.add_serializable_component( "final_transducer", final_transducer, lambda: final_transducer) self.policy_learning = self.add_serializable_component( "policy_learning", policy_learning, lambda: policy_learning) if policy_learning is not None else None self.length_prior = self.add_serializable_component( "length_prior", length_prior, lambda: length_prior) if length_prior is not None else None self.eps_greedy = self.add_serializable_component( "eps_greedy", eps_greedy, lambda: eps_greedy) if eps_greedy is not None else None self.sample_during_search = sample_during_search self.compute_report = compute_report self.reporter = reporter
def __init__( self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), output_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), bias: bool = True, activation: str = 'tanh', hidden_layers: numbers.Integral = 1, param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)), layers: Optional[Sequence[Transform]] = None) -> None: self.layers = self.add_serializable_component( "layers", layers, lambda: MLP._create_layers(num_layers=hidden_layers, input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, bias=bias, activation=activation, param_init=param_init, bias_init=bias_init))
def __init__(self, model_file: str = settings.DEFAULT_MOD_PATH, log_file: str = settings.DEFAULT_LOG_PATH, dropout: numbers.Real = 0.3, weight_noise: numbers.Real = 0.0, default_layer_dim: numbers.Integral = 512, param_init: ParamInitializer = bare(GlorotInitializer), bias_init: ParamInitializer = bare(ZeroInitializer), truncate_dec_batches: bool = False, save_num_checkpoints: numbers.Integral = 1, loss_comb_method: str = "sum", compute_report: bool = False, commandline_args: dict = {}, placeholders: Dict[str, Any] = {}) -> None: self.model_file = model_file self.log_file = log_file self.dropout = dropout self.weight_noise = weight_noise self.default_layer_dim = default_layer_dim self.param_init = param_init self.bias_init = bias_init self.truncate_dec_batches = truncate_dec_batches self.commandline_args = commandline_args self.save_num_checkpoints = save_num_checkpoints self.loss_comb_method = loss_comb_method self.compute_report = compute_report self.placeholders = placeholders
def __init__( self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), state_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)) ) -> None: self.input_dim = input_dim self.state_dim = state_dim self.hidden_dim = hidden_dim my_params = param_collections.ParamManager.my_params(self) self.linear_context = my_params.add_parameters( (hidden_dim, input_dim), init=param_init[0].initializer((hidden_dim, input_dim))) self.linear_query = my_params.add_parameters( (hidden_dim, state_dim), init=param_init[1].initializer((hidden_dim, state_dim))) self.bias_context = my_params.add_parameters( (hidden_dim, ), init=bias_init.initializer((hidden_dim, ))) self.pU = my_params.add_parameters((1, hidden_dim), init=param_init[2].initializer( (1, hidden_dim))) self.curr_sent = None self.attention_vecs = None self.WI = None
def __init__(self, input_dim=Ref("exp_global.default_layer_dim"), state_dim=Ref("exp_global.default_layer_dim"), hidden_dim=Ref("exp_global.default_layer_dim"), param_init=Ref("exp_global.param_init", default=bare(GlorotInitializer)), bias_init=Ref("exp_global.bias_init", default=bare(ZeroInitializer))): self.input_dim = input_dim self.state_dim = state_dim self.hidden_dim = hidden_dim param_collection = ParamManager.my_params(self) self.pW = param_collection.add_parameters((hidden_dim, input_dim), init=param_init.initializer( (hidden_dim, input_dim))) self.pV = param_collection.add_parameters((hidden_dim, state_dim), init=param_init.initializer( (hidden_dim, state_dim))) self.pb = param_collection.add_parameters((hidden_dim, ), init=bias_init.initializer( (hidden_dim, ))) self.pU = param_collection.add_parameters((1, hidden_dim), init=param_init.initializer( (1, hidden_dim))) self.curr_sent = None
def __init__( self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), state_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), hidden_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)), truncate_dec_batches: bool = Ref("exp_global.truncate_dec_batches", default=False) ) -> None: self.input_dim = input_dim self.state_dim = state_dim self.hidden_dim = hidden_dim self.truncate_dec_batches = truncate_dec_batches param_collection = param_collections.ParamManager.my_params(self) self.pW = param_collection.add_parameters((hidden_dim, input_dim), init=param_init.initializer( (hidden_dim, input_dim))) self.pV = param_collection.add_parameters((hidden_dim, state_dim), init=param_init.initializer( (hidden_dim, state_dim))) self.pb = param_collection.add_parameters((hidden_dim, ), init=bias_init.initializer( (hidden_dim, ))) self.pU = param_collection.add_parameters((1, hidden_dim), init=param_init.initializer( (1, hidden_dim))) self.curr_sent = None
def __init__( self, input_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), output_dim: numbers.Integral = Ref("exp_global.default_layer_dim"), bias: bool = True, activation: str = 'tanh', param_init: param_initializers.ParamInitializer = Ref( "exp_global.param_init", default=bare(param_initializers.GlorotInitializer)), bias_init: param_initializers.ParamInitializer = Ref( "exp_global.bias_init", default=bare(param_initializers.ZeroInitializer)) ) -> None: self.bias = bias self.output_dim = output_dim self.input_dim = input_dim self.activation = tt.activation_by_name(activation) my_params = param_collections.ParamManager.my_params(self) self.W1 = my_params.add_parameters( (self.output_dim, self.input_dim), init=param_init.initializer((self.output_dim, self.input_dim))) if self.bias: self.b1 = my_params.add_parameters((self.output_dim, ), init=bias_init.initializer( (self.output_dim, )))