def __init__(self, model, src_file=None, trg_file=None, dev_every=0, batcher=bare(SrcBatcher, batch_size=32), loss_calculator=None, run_for_epochs=None, lr_decay=1.0, lr_decay_times=3, patience=1, initial_patience=None, dev_tasks=None, restart_trainer=False, reload_command=None, name=None, sample_train_sents=None, max_num_train_sents=None, max_src_len=None, max_trg_len=None, exp_global=Ref(Path("exp_global"))): """ Args: exp_global: model: a generator.GeneratorModel object src_file: The file for the source data. trg_file: The file for the target data. dev_every (int): dev checkpoints every n sentences (0 for only after epoch) batcher: Type of batcher loss_calculator: lr_decay (float): lr_decay_times (int): Early stopping after decaying learning rate a certain number of times patience (int): apply LR decay after dev scores haven't improved over this many checkpoints initial_patience (int): if given, allows adjusting patience for the first LR decay dev_tasks: A list of tasks to run on the development set restart_trainer: Restart trainer (useful for Adam) and revert weights to best dev checkpoint when applying LR decay (https://arxiv.org/pdf/1706.09733.pdf) reload_command: Command to change the input data after each epoch. --epoch EPOCH_NUM will be appended to the command. To just reload the data after each epoch set the command to 'true'. sample_train_sents: max_num_train_sents: max_src_len: max_trg_len: name: will be prepended to log outputs if given """ self.exp_global = exp_global self.model_file = self.exp_global.dynet_param_collection.model_file self.src_file = src_file self.trg_file = trg_file self.dev_tasks = dev_tasks if lr_decay > 1.0 or lr_decay <= 0.0: raise RuntimeError("illegal lr_decay, must satisfy: 0.0 < lr_decay <= 1.0") self.lr_decay = lr_decay self.patience = patience self.initial_patience = initial_patience self.lr_decay_times = lr_decay_times self.restart_trainer = restart_trainer self.run_for_epochs = run_for_epochs self.early_stopping_reached = False # training state self.training_state = TrainingState() self.reload_command = reload_command self.model = model self.loss_calculator = loss_calculator or LossCalculator(MLELoss()) self.sample_train_sents = sample_train_sents self.max_num_train_sents = max_num_train_sents self.max_src_len = max_src_len self.max_trg_len = max_trg_len self.batcher = batcher self.logger = BatchLossTracker(self, dev_every, name)
def __init__(self, tasks, trainer=None, exp_global=Ref(Path("exp_global"))): super().__init__(exp_global=exp_global, tasks=tasks, trainer=trainer) self.exp_global = exp_global
def shared_params(self): return [ set([Path(".layers"), Path(".bridge.dec_layers")]), set([Path(".lstm_dim"), Path(".bridge.dec_dim")]) ]
def shared_params(self): return [set([Path(".src_embedder.emb_dim"), Path(".encoder.input_dim")]), set([Path(".encoder.hidden_dim"), Path(".attender.input_dim"), Path(".decoder.input_dim")]), set([Path(".attender.state_dim"), Path(".decoder.lstm_dim")]), set([Path(".trg_embedder.emb_dim"), Path(".decoder.trg_embed_dim")])]
def test_eq(self): self.assertEqual(Path(""), Path("")) self.assertEqual(Path(".."), Path("..")) self.assertEqual(Path("one.2"), Path("one.2")) self.assertEqual(Path("one.2"), Path("one.2.3").parent()) self.assertNotEqual(Path("one.2"), Path("one.2.3")) self.assertNotEqual(Path(""), Path("."))
def __init__(self, num_layers, input_dim, hidden_dim, add_to_output=False, dropout=None, exp_global=Ref(Path("exp_global"))): assert num_layers > 0 self.builder_layers = [] self.builder_layers.append(UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim, dropout=dropout)) for _ in range(num_layers - 1): self.builder_layers.append(UniLSTMSeqTransducer(exp_global=exp_global, input_dim=hidden_dim, hidden_dim=hidden_dim, dropout=dropout)) self.add_to_output = add_to_output
def __init__(self, tasks, task_weights=None, trainer=None, exp_global=Ref(Path("exp_global"))): super().__init__(exp_global=exp_global, tasks=tasks, trainer=trainer) self.task_weights = task_weights or [1./len(tasks)] * len(tasks) self.exp_global = exp_global
def test_add_path(self): self.assertEqual(str(Path("one").add_path(Path("2"))), "one.2") self.assertEqual(str(Path("one").add_path(Path("2.3"))), "one.2.3") self.assertEqual(str(Path("").add_path(Path("2.3"))), "2.3") self.assertEqual(str(Path("one.2").add_path(Path(""))), "one.2") self.assertEqual(str(Path("").add_path(Path(""))), "") self.assertEqual(str(Path(".").add_path(Path(""))), ".") self.assertEqual(str(Path(".").add_path(Path("one.two"))), ".one.two") self.assertEqual(str(Path(".xy").add_path(Path("one.two"))), ".xy.one.two") with self.assertRaises(NotImplementedError): Path("one").add_path(Path(".2.3"))
def shared_params(self): return [set([Path(".input_dim"), Path(".modules.0.input_dim")])]
def test_ancestors(self): self.assertEqual(Path("").ancestors(), set([Path("")])) self.assertEqual(Path("a").ancestors(), set([Path(""),Path("a")])) self.assertEqual(Path("one.two.three").ancestors(), set([Path(""), Path("one"), Path("one.two"), Path("one.two.three")]))
def __init__(self, model=Ref(path=Path("model")), src_file=None, trg_file=None, dev_every=0, batcher=bare(xnmt.batcher.SrcBatcher, batch_size=32), loss_calculator=None, trainer=None, run_for_epochs=None, lr_decay=1.0, lr_decay_times=3, patience=1, initial_patience=None, dev_tasks=None, restart_trainer=False, reload_command=None, name=None, sample_train_sents=None, max_num_train_sents=None, max_src_len=None, max_trg_len=None, exp_global=Ref(Path("exp_global"))): """ :param model: a generator.GeneratorModel object :param src_file: the source training file :param trg_file: the target training file :param dev_every (int): dev checkpoints every n sentences (0 for only after epoch) :param batcher: Type of batcher :param loss_calculator: The method for calculating the loss. :param trainer: Trainer object, default is SGD with learning rate 0.1 :param run_for_epochs: :param lr_decay (float): :param lr_decay_times (int): Early stopping after decaying learning rate a certain number of times :param patience (int): apply LR decay after dev scores haven't improved over this many checkpoints :param initial_patience (int): if given, allows adjusting patience for the first LR decay :param dev_tasks: A list of tasks to use during the development stage. :param restart_trainer: Restart trainer (useful for Adam) and revert weights to best dev checkpoint when applying LR decay (https://arxiv.org/pdf/1706.09733.pdf) :param reload_command: Command to change the input data after each epoch. --epoch EPOCH_NUM will be appended to the command. To just reload the data after each epoch set the command to 'true'. :param name: will be prepended to log outputs if given :param sample_train_sents: :param max_num_train_sents: :param max_src_len: :param max_trg_len: :param exp_global: """ super().__init__(model=model, src_file=src_file, trg_file=trg_file, dev_every=dev_every, batcher=batcher, loss_calculator=loss_calculator, run_for_epochs=run_for_epochs, lr_decay=lr_decay, lr_decay_times=lr_decay_times, patience=patience, initial_patience=initial_patience, dev_tasks=dev_tasks, restart_trainer=restart_trainer, reload_command=reload_command, name=name, sample_train_sents=sample_train_sents, max_num_train_sents=max_num_train_sents, max_src_len=max_src_len, max_trg_len=max_trg_len, exp_global=exp_global) self.trainer = trainer or xnmt.optimizer.SimpleSGDTrainer( exp_global=self.exp_global, e0=0.1) self.dynet_profiling = getattr(exp_global.commandline_args, "dynet_profiling", 0)
def __init__(self, dec_layers=1, dec_dim=None, exp_global=Ref(Path("exp_global"))): self.dec_layers = dec_layers self.dec_dim = dec_dim or exp_global.default_layer_dim
def __init__(self, exp_global=Ref(Path("exp_global")), layers=1, input_dim=None, hidden_dim=None, dropout=None, weightnoise_std=None, param_init=None, bias_init=None): register_handler(self) self.num_layers = layers input_dim = input_dim or exp_global.default_layer_dim hidden_dim = hidden_dim or exp_global.default_layer_dim self.hidden_dim = hidden_dim self.dropout_rate = dropout or exp_global.dropout self.weightnoise_std = weightnoise_std or exp_global.weight_noise assert hidden_dim % 2 == 0 param_init = param_init or exp_global.param_init bias_init = bias_init or exp_global.bias_init self.forward_layers = [ UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim / 2, dropout=dropout, weightnoise_std=weightnoise_std, param_init=param_init[0] if isinstance( param_init, Sequence) else param_init, bias_init=bias_init[0] if isinstance( bias_init, Sequence) else bias_init) ] self.backward_layers = [ UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim / 2, dropout=dropout, weightnoise_std=weightnoise_std, param_init=param_init[0] if isinstance( param_init, Sequence) else param_init, bias_init=bias_init[0] if isinstance( bias_init, Sequence) else bias_init) ] self.forward_layers += [ UniLSTMSeqTransducer(exp_global=exp_global, input_dim=hidden_dim, hidden_dim=hidden_dim / 2, dropout=dropout, weightnoise_std=weightnoise_std, param_init=param_init[i] if isinstance( param_init, Sequence) else param_init, bias_init=bias_init[i] if isinstance( bias_init, Sequence) else bias_init) for i in range(1, layers) ] self.backward_layers += [ UniLSTMSeqTransducer(exp_global=exp_global, input_dim=hidden_dim, hidden_dim=hidden_dim / 2, dropout=dropout, weightnoise_std=weightnoise_std, param_init=param_init[i] if isinstance( param_init, Sequence) else param_init, bias_init=bias_init[i] if isinstance( bias_init, Sequence) else bias_init) for i in range(1, layers) ]
def test_str(self): self.assertEqual(str(Path("one.2")), "one.2") self.assertEqual(str(Path("")), "")
def __init__(self, num_layers, input_dim, hidden_dim, add_to_output=False, dropout=None, exp_global=Ref(Path("exp_global"))): """ :param num_layers: depth of the RNN (> 0) :param input_dim: size of the inputs :param hidden_dim: size of the outputs (and intermediate layer representations) :param model: :param add_to_output: whether to add a residual connection to the output layer """ assert num_layers > 0 self.builder_layers = [] self.builder_layers.append(UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim, dropout=dropout)) for _ in range(num_layers - 1): self.builder_layers.append(UniLSTMSeqTransducer(exp_global=exp_global, input_dim=hidden_dim, hidden_dim=hidden_dim, dropout=dropout)) self.add_to_output = add_to_output
def test_set(self): s = set([Path("one.2"), Path("one.1.3"), Path("one.1.3")]) self.assertIn(Path("one.2"), s) self.assertEqual(len(s), 2)
def __init__(self, exp_global=Ref(Path("exp_global")), e0=0.1): self.optimizer = dy.SimpleSGDTrainer( exp_global.dynet_param_collection.param_col, e0)
def test_get_absolute(self): self.assertEqual(Path(".").get_absolute(Path("1.2")), Path("1.2")) self.assertEqual( Path(".x.y").get_absolute(Path("1.2")), Path("1.2.x.y")) self.assertEqual( Path("..x.y").get_absolute(Path("1.2")), Path("1.x.y")) self.assertEqual(Path("...x.y").get_absolute(Path("1.2")), Path("x.y")) with self.assertRaises(ValueError): Path("....x.y").get_absolute(Path("1.2"))
def __init__(self, exp_global=Ref(Path("exp_global")), e0=0.01, mom=0.9): self.optimizer = dy.MomentumSGDTrainer( exp_global.dynet_param_collection.param_col, e0, mom)
def __init__(self, num_layers, input_dim, hidden_dim, add_to_output=False, dropout=None, exp_global=Ref(Path("exp_global"))): assert num_layers > 1 assert hidden_dim % 2 == 0 self.forward_layer = UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim//2, dropout=dropout) self.backward_layer = UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim//2, dropout=dropout) self.residual_network = ResidualRNNBuilder(exp_global=exp_global, num_layers=num_layers - 1, input_dim=hidden_dim, hidden_dim=hidden_dim, add_to_output=add_to_output, dropout=dropout)
def __init__(self, exp_global=Ref(Path("exp_global")), e0=0.1, eps=1e-20): self.optimizer = dy.AdagradTrainer( exp_global.dynet_param_collection.param_col, e0, eps=eps)
def __init__(self, model_file=None, src_file=None, trg_file=None, ref_file=None, max_src_len=None, post_process="none", report_path=None, report_type="html", beam=1, max_len=100, len_norm_type=None, mode="onebest", batcher=Ref(Path("train.batcher"), required=False)): self.model_file = model_file self.src_file = src_file self.trg_file = trg_file self.ref_file = ref_file self.max_src_len = max_src_len self.post_process = post_process self.report_path = report_path self.report_type = report_type self.beam = beam self.max_len = max_len self.len_norm_type = len_norm_type self.mode = mode self.batcher = batcher
def __init__(self, exp_global=Ref(Path("exp_global")), eps=1e-6, rho=0.95): self.optimizer = dy.AdadeltaTrainer( exp_global.dynet_param_collection.param_col, eps, rho)
def __init__(self, exp_global=Ref(Path("exp_global")), layers=1, input_dim=None, lstm_dim=None, mlp_hidden_dim=None, trg_embed_dim=None, dropout=None, rnn_spec="lstm", residual_to_output=False, input_feeding=True, param_init_lstm=None, param_init_context=None, bias_init_context=None, param_init_output=None, bias_init_output=None, bridge=bare(CopyBridge), label_smoothing=0.0, vocab_projector=None, vocab_size=None, vocab=None, trg_reader=Ref(path=Path("model.trg_reader"), required=False)): register_handler(self) self.param_col = exp_global.dynet_param_collection.param_col # Define dim lstm_dim = lstm_dim or exp_global.default_layer_dim self.mlp_hidden_dim = mlp_hidden_dim = mlp_hidden_dim or exp_global.default_layer_dim trg_embed_dim = trg_embed_dim or exp_global.default_layer_dim input_dim = input_dim or exp_global.default_layer_dim self.input_dim = input_dim self.label_smoothing = label_smoothing # Input feeding self.input_feeding = input_feeding self.lstm_dim = lstm_dim lstm_input = trg_embed_dim if input_feeding: lstm_input += input_dim # Bridge self.lstm_layers = layers self.bridge = bridge # LSTM self.fwd_lstm = RnnDecoder.rnn_from_spec( spec=rnn_spec, num_layers=layers, input_dim=lstm_input, hidden_dim=lstm_dim, model=self.param_col, residual_to_output=residual_to_output) param_init_lstm = param_init_lstm or exp_global.param_init if not isinstance(param_init_lstm, GlorotInitializer): raise NotImplementedError( "For the decoder LSTM, only Glorot initialization is currently supported" ) if getattr(param_init_lstm, "gain", 1.0) != 1.0: for l in range(layers): for i in [0, 1]: self.fwd_lstm.param_collection().parameters_list()[ 3 * l + i].scale(param_init_lstm.gain) # MLP self.context_projector = xnmt.linear.Linear( input_dim=input_dim + lstm_dim, output_dim=mlp_hidden_dim, model=self.param_col, param_init=param_init_context or exp_global.param_init, bias_init=bias_init_context or exp_global.bias_init) self.vocab_size = self.choose_vocab_size(vocab_size, vocab, trg_reader) self.vocab_projector = vocab_projector or xnmt.linear.Linear( input_dim=self.mlp_hidden_dim, output_dim=self.vocab_size, model=self.param_col, param_init=param_init_output or exp_global.param_init, bias_init=bias_init_output or exp_global.bias_init) # Dropout self.dropout = dropout or exp_global.dropout
def __init__(self, exp_global=Ref(Path("exp_global")), ## COMPONENTS embed_encoder=None, segment_composer=None, final_transducer=None, ## OPTIONS length_prior=3.3, length_prior_alpha=None, # GeometricSequence epsilon_greedy=None, # GeometricSequence reinforce_scale=None, # GeometricSequence confidence_penalty=None, # SegmentationConfidencePenalty # For segmentation warmup (Always use the poisson prior) segmentation_warmup=0, ## FLAGS learn_delete = False, use_baseline = True, z_normalization = True, learn_segmentation = True, compose_char = False, log_reward = True, debug=False, print_sample=False): register_handler(self) model = exp_global.dynet_param_collection.param_col # Sanity check assert embed_encoder is not None assert segment_composer is not None assert final_transducer is not None # The Embed Encoder transduces the embedding vectors to a sequence of vector self.embed_encoder = embed_encoder if not hasattr(embed_encoder, "hidden_dim"): embed_encoder_dim = yaml_context.default_layer_dim else: embed_encoder_dim = embed_encoder.hidden_dim # The Segment transducer produced word embeddings based on sequence of character embeddings self.segment_composer = segment_composer # The final transducer self.final_transducer = final_transducer # Decision layer of segmentation self.segment_transform = linear.Linear(input_dim = embed_encoder_dim, output_dim = 3 if learn_delete else 2, model=model) # The baseline linear regression model self.baseline = linear.Linear(input_dim = embed_encoder_dim, output_dim = 1, model = model) # Flags self.use_baseline = use_baseline self.learn_segmentation = learn_segmentation self.learn_delete = learn_delete self.z_normalization = z_normalization self.debug = debug self.compose_char = compose_char self.print_sample = print_sample self.log_reward = log_reward # Fixed Parameters self.length_prior = length_prior self.segmentation_warmup = segmentation_warmup # Variable Parameters self.length_prior_alpha = length_prior_alpha self.lmbd = reinforce_scale self.eps = epsilon_greedy self.confidence_penalty = confidence_penalty # States of the object self.train = False