def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up): from Sequence2Sequence import create_criterion_function, create_model_train model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # Create learner if block_size is not None and num_quantization_bits != default_quantization_bits: raise RuntimeError( "Block momentum cannot be used with quantization, please remove quantized_bits option." ) lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? local_learner = fsadagrad( model_train.parameters, lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4], UnitType.sample, epoch_size), momentum=momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) trainer = Trainer(None, criterion, learner, progress_printer) train_bind = { criterion.arguments[0]: train_reader.streams.features, criterion.arguments[1]: train_reader.streams.labels } training_session( mb_source=train_reader, trainer=trainer, model_inputs_to_streams=train_bind, mb_size=minibatch_size, progress_frequency=epoch_size, checkpoint_config=CheckpointConfig(frequency=epoch_size, filename=os.path.join( model_path, "SequenceToSequence"), restore=False), cv_config=CrossValidationConfig(source=test_reader, mb_size=minibatch_size)).train()
def train(reader, model, max_epochs): # declare the model's input dimension, so that the saved model is usable model.update_signature(Sequence[SparseTensor[vocab_size]]) #model.declare_args(vocab_size) # criterion: (model args, labels) -> (loss, metric) # here (query, slot_labels) -> (ce, errs) criterion = create_criterion_function(model) labels = reader.streams.slot_labels #labels = reader.streams.intent_labels # for intent classification #from cntk.logging.graph import plot #plot(criterion, filename=data_dir + "/model.pdf") # iteration parameters --needed here because learner schedule needs it epoch_size = 36000 minibatch_size = 70 #epoch_size = 1000 ; max_epochs = 1 # uncomment for faster testing # SGD parameters learner = fsadagrad(criterion.parameters, lr = learning_rate_schedule([0.003]*2+[0.0015]*12+[0.0003], UnitType.sample, epoch_size), momentum = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)), gradient_clipping_threshold_per_sample = 15, gradient_clipping_with_truncation = True) # trainer trainer = Trainer(None, criterion, learner) # process minibatches and perform model training log_number_of_parameters(model) ; print() progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging #progress_printer = ProgressPrinter(tag='Training') t = 0 for epoch in range(max_epochs): # loop over epochs peek(model, epoch) # log some interesting info epoch_end = (epoch+1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch # BUGBUG: The change of minibatch_size parameter vv has no effect. # TODO: change all examples to this pattern; then remove this comment data = reader.next_minibatch(min(minibatch_size, epoch_end-t)) # fetch minibatch #trainer.train_minibatch(data[reader.streams.query], data[labels]) # update model with it trainer.train_minibatch({criterion.arguments[0]: data[reader.streams.query], criterion.arguments[1]: data[labels]}) # update model with it t += data[labels].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) return loss, metric # return values from last epoch
def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up): from Sequence2Sequence import create_criterion_function, create_model_train model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # Create learner if block_size is not None and num_quantization_bits != default_quantization_bits: raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.") lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? local_learner = fsadagrad(model_train.parameters, lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size), momentum = momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) trainer = Trainer(None, criterion, learner, progress_printer) train_bind = {criterion.arguments[0]: train_reader.streams.features, criterion.arguments[1]: train_reader.streams.labels} training_session( mb_source = train_reader, trainer=trainer, model_inputs_to_streams=train_bind, mb_size=minibatch_size, progress_frequency=epoch_size, checkpoint_config=CheckpointConfig(frequency = epoch_size, filename = os.path.join(model_path, "SequenceToSequence"), restore = False), cv_config=CrossValidationConfig(source=test_reader, mb_size=minibatch_size) ).train()
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size): # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause # an error since the training criterion uses a reduced sequence axis for the labels. # This is because it removes the initial <s> symbol. Hence, we must leave the model # with unspecified input shapes and axes. # create the training wrapper for the s2smodel, as well as the criterion function model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # also wire in a greedy decoder so that we can properly log progress on a validation example # This is not used for the actual training process. model_greedy = create_model_greedy(s2smodel) # This does not need to be done in training generally though # Instantiate the trainer object to drive the model training minibatch_size = 72 lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? learner = fsadagrad( model_train.parameters, lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4], UnitType.sample, epoch_size), momentum=momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) trainer = Trainer(None, criterion, learner) # Get minibatches of sequences to train with and perform model training total_samples = 0 mbs = 0 eval_freq = 100 # print out some useful training information log_number_of_parameters(model_train) print() progress_printer = ProgressPrinter(freq=30, tag='Training') #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file sparse_to_dense = create_sparse_to_dense(input_vocab_dim) for epoch in range(max_epochs): print("Saving model to '%s'" % model_path(epoch)) s2smodel.save(model_path(epoch)) while total_samples < (epoch + 1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels]) trainer.train_minibatch({ criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels] }) progress_printer.update_with_trainer( trainer, with_metric=True) # log progress # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % eval_freq == 0: mb_valid = valid_reader.next_minibatch(1) # run an eval on the decoder output model (i.e. don't use the groundtruth) e = model_greedy(mb_valid[valid_reader.streams.features]) print( format_sequences( sparse_to_dense( mb_valid[valid_reader.streams.features]), i2w)) print("->") print(format_sequences(e, i2w)) # debugging attention if use_attention: debug_attention(model_greedy, mb_valid[valid_reader.streams.features]) total_samples += mb_train[train_reader.streams.labels].num_samples mbs += 1 # log a summary of the stats for the epoch progress_printer.epoch_summary(with_metric=True) # done: save the final model print("Saving final model to '%s'" % model_path(max_epochs)) s2smodel.save(model_path(max_epochs)) print("%d epochs complete." % max_epochs)
def train(reader, model, max_epochs): # declare the model's input dimension, so that the saved model is usable model.update_signature(Sequence[SparseTensor[vocab_size]]) #model.declare_args(vocab_size) # criterion: (model args, labels) -> (loss, metric) # here (query, slot_labels) -> (ce, errs) criterion = create_criterion_function(model) labels = reader.streams.slot_labels #labels = reader.streams.intent_labels # for intent classification #from cntk.logging.graph import plot #plot(criterion, filename=data_dir + "/model.pdf") # iteration parameters --needed here because learner schedule needs it epoch_size = 36000 minibatch_size = 70 #epoch_size = 1000 ; max_epochs = 1 # uncomment for faster testing # SGD parameters learner = fsadagrad( criterion.parameters, lr=learning_rate_schedule([0.003] * 2 + [0.0015] * 12 + [0.0003], UnitType.sample, epoch_size), momentum=momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)), gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) # trainer trainer = Trainer(None, criterion, learner) # process minibatches and perform model training log_number_of_parameters(model) print() progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging #progress_printer = ProgressPrinter(tag='Training') t = 0 for epoch in range(max_epochs): # loop over epochs peek(model, epoch) # log some interesting info epoch_end = (epoch + 1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch # BUGBUG: The change of minibatch_size parameter vv has no effect. # TODO: change all examples to this pattern; then remove this comment data = reader.next_minibatch(min(minibatch_size, epoch_end - t)) # fetch minibatch #trainer.train_minibatch(data[reader.streams.query], data[labels]) # update model with it trainer.train_minibatch({ criterion.arguments[0]: data[reader.streams.query], criterion.arguments[1]: data[labels] }) # update model with it t += data[labels].num_samples # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary( with_metric=True) return loss, metric # return values from last epoch
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size): # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause # an error since the training criterion uses a reduced sequence axis for the labels. # This is because it removes the initial <s> symbol. Hence, we must leave the model # with unspecified input shapes and axes. # create the training wrapper for the s2smodel, as well as the criterion function model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # also wire in a greedy decoder so that we can properly log progress on a validation example # This is not used for the actual training process. model_greedy = create_model_greedy(s2smodel) # This does not need to be done in training generally though # Instantiate the trainer object to drive the model training minibatch_size = 72 lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? learner = fsadagrad(model_train.parameters, lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size), momentum = momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) trainer = Trainer(None, criterion, learner) # Get minibatches of sequences to train with and perform model training total_samples = 0 mbs = 0 eval_freq = 100 # print out some useful training information log_number_of_parameters(model_train) ; print() progress_printer = ProgressPrinter(freq=30, tag='Training') #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file sparse_to_dense = create_sparse_to_dense(input_vocab_dim) for epoch in range(max_epochs): print("Saving model to '%s'" % model_path(epoch)) s2smodel.save(model_path(epoch)) while total_samples < (epoch+1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels]) trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels]}) progress_printer.update_with_trainer(trainer, with_metric=True) # log progress # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % eval_freq == 0: mb_valid = valid_reader.next_minibatch(1) # run an eval on the decoder output model (i.e. don't use the groundtruth) e = model_greedy(mb_valid[valid_reader.streams.features]) print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w)) print("->") print(format_sequences(e, i2w)) # debugging attention if use_attention: debug_attention(model_greedy, mb_valid[valid_reader.streams.features]) total_samples += mb_train[train_reader.streams.labels].num_samples mbs += 1 # log a summary of the stats for the epoch progress_printer.epoch_summary(with_metric=True) # done: save the final model print("Saving final model to '%s'" % model_path(max_epochs)) s2smodel.save(model_path(max_epochs)) print("%d epochs complete." % max_epochs)