def __init__(self, model, src_file=None, trg_file=None, dev_every=0, batcher=bare(SrcBatcher, batch_size=32), loss_calculator=None, run_for_epochs=None, lr_decay=1.0, lr_decay_times=3, patience=1, initial_patience=None, dev_tasks=None, restart_trainer=False, reload_command=None, name=None, sample_train_sents=None, max_num_train_sents=None, max_src_len=None, max_trg_len=None, exp_global=Ref(Path("exp_global"))): """ Args: exp_global: model: a generator.GeneratorModel object src_file: The file for the source data. trg_file: The file for the target data. dev_every (int): dev checkpoints every n sentences (0 for only after epoch) batcher: Type of batcher loss_calculator: lr_decay (float): lr_decay_times (int): Early stopping after decaying learning rate a certain number of times patience (int): apply LR decay after dev scores haven't improved over this many checkpoints initial_patience (int): if given, allows adjusting patience for the first LR decay dev_tasks: A list of tasks to run on the development set restart_trainer: Restart trainer (useful for Adam) and revert weights to best dev checkpoint when applying LR decay (https://arxiv.org/pdf/1706.09733.pdf) reload_command: Command to change the input data after each epoch. --epoch EPOCH_NUM will be appended to the command. To just reload the data after each epoch set the command to 'true'. sample_train_sents: max_num_train_sents: max_src_len: max_trg_len: name: will be prepended to log outputs if given """ self.exp_global = exp_global self.model_file = self.exp_global.dynet_param_collection.model_file self.src_file = src_file self.trg_file = trg_file self.dev_tasks = dev_tasks if lr_decay > 1.0 or lr_decay <= 0.0: raise RuntimeError( "illegal lr_decay, must satisfy: 0.0 < lr_decay <= 1.0") self.lr_decay = lr_decay self.patience = patience self.initial_patience = initial_patience self.lr_decay_times = lr_decay_times self.restart_trainer = restart_trainer self.run_for_epochs = run_for_epochs self.early_stopping_reached = False # training state self.training_state = TrainingState() self.reload_command = reload_command self.model = model self.loss_calculator = loss_calculator or LossCalculator(MLELoss()) self.sample_train_sents = sample_train_sents self.max_num_train_sents = max_num_train_sents self.max_src_len = max_src_len self.max_trg_len = max_trg_len self.batcher = batcher self.logger = BatchLossTracker(self, dev_every, name)
def __init__(self, exp_global=Ref(Path("exp_global")), layers=1, input_dim=None, hidden_dim=None, dropout=None, weightnoise_std=None, param_init=None, bias_init=None): register_handler(self) self.num_layers = layers input_dim = input_dim or exp_global.default_layer_dim hidden_dim = hidden_dim or exp_global.default_layer_dim self.hidden_dim = hidden_dim self.dropout_rate = dropout or exp_global.dropout self.weightnoise_std = weightnoise_std or exp_global.weight_noise assert hidden_dim % 2 == 0 param_init = param_init or exp_global.param_init bias_init = bias_init or exp_global.bias_init self.forward_layers = [ UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim / 2, dropout=dropout, weightnoise_std=weightnoise_std, param_init=param_init[0] if isinstance( param_init, Sequence) else param_init, bias_init=bias_init[0] if isinstance( bias_init, Sequence) else bias_init) ] self.backward_layers = [ UniLSTMSeqTransducer(exp_global=exp_global, input_dim=input_dim, hidden_dim=hidden_dim / 2, dropout=dropout, weightnoise_std=weightnoise_std, param_init=param_init[0] if isinstance( param_init, Sequence) else param_init, bias_init=bias_init[0] if isinstance( bias_init, Sequence) else bias_init) ] self.forward_layers += [ UniLSTMSeqTransducer(exp_global=exp_global, input_dim=hidden_dim, hidden_dim=hidden_dim / 2, dropout=dropout, weightnoise_std=weightnoise_std, param_init=param_init[i] if isinstance( param_init, Sequence) else param_init, bias_init=bias_init[i] if isinstance( bias_init, Sequence) else bias_init) for i in range(1, layers) ] self.backward_layers += [ UniLSTMSeqTransducer(exp_global=exp_global, input_dim=hidden_dim, hidden_dim=hidden_dim / 2, dropout=dropout, weightnoise_std=weightnoise_std, param_init=param_init[i] if isinstance( param_init, Sequence) else param_init, bias_init=bias_init[i] if isinstance( bias_init, Sequence) else bias_init) for i in range(1, layers) ]
def __init__(self, tasks, trainer=None, exp_global=Ref(Path("exp_global"))): super().__init__(exp_global=exp_global, tasks=tasks, trainer=trainer) self.exp_global = exp_global
def __init__(self, exp_global=Ref(Path("exp_global")), ## COMPONENTS embed_encoder=None, segment_composer=None, final_transducer=None, ## OPTIONS length_prior=3.3, length_prior_alpha=None, # GeometricSequence epsilon_greedy=None, # GeometricSequence reinforce_scale=None, # GeometricSequence confidence_penalty=None, # SegmentationConfidencePenalty # For segmentation warmup (Always use the poisson prior) segmentation_warmup=0, ## FLAGS learn_delete = False, use_baseline = True, z_normalization = True, learn_segmentation = True, compose_char = False, log_reward = True, debug=False, print_sample=False): register_handler(self) model = exp_global.dynet_param_collection.param_col # Sanity check assert embed_encoder is not None assert segment_composer is not None assert final_transducer is not None # The Embed Encoder transduces the embedding vectors to a sequence of vector self.embed_encoder = embed_encoder if not hasattr(embed_encoder, "hidden_dim"): embed_encoder_dim = yaml_context.default_layer_dim else: embed_encoder_dim = embed_encoder.hidden_dim # The Segment transducer produced word embeddings based on sequence of character embeddings self.segment_composer = segment_composer # The final transducer self.final_transducer = final_transducer # Decision layer of segmentation self.segment_transform = linear.Linear(input_dim = embed_encoder_dim, output_dim = 3 if learn_delete else 2, model=model) # The baseline linear regression model self.baseline = linear.Linear(input_dim = embed_encoder_dim, output_dim = 1, model = model) # Flags self.use_baseline = use_baseline self.learn_segmentation = learn_segmentation self.learn_delete = learn_delete self.z_normalization = z_normalization self.debug = debug self.compose_char = compose_char self.print_sample = print_sample self.log_reward = log_reward # Fixed Parameters self.length_prior = length_prior self.segmentation_warmup = segmentation_warmup # Variable Parameters self.length_prior_alpha = length_prior_alpha self.lmbd = reinforce_scale self.eps = epsilon_greedy self.confidence_penalty = confidence_penalty # States of the object self.train = False