def train(reader, model, max_epochs): # Input variables denoting the features and label data query = Input(input_dim, is_sparse=False) slot_labels = Input(num_labels, is_sparse=True) # TODO: make sparse once it works # apply model to input z = model(query) # loss and metric ce = cross_entropy_with_softmax(z, slot_labels) pe = classification_error (z, slot_labels) # training config epoch_size = 36000 minibatch_size = 70 num_mbs_to_show_result = 100 momentum_time_constant = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)) # TODO: Change to round number. This is 664.39. 700? lr_schedule = [0.003]*2+[0.0015]*12+[0.0003] # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values) # trainer object lr_per_sample = learning_rate_schedule(lr_schedule, UnitType.sample, epoch_size) learner = adam_sgd(z.parameters, lr=lr_per_sample, momentum=momentum_time_constant, unit_gain=True, low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = Trainer(z, (ce, pe), [learner]) # define mapping from reader streams to network inputs input_map = { query : reader.streams.query, slot_labels : reader.streams.slot_labels } # process minibatches and perform model training log_number_of_parameters(z) ; print() # more detailed logging progress_printer = ProgressPrinter(freq=100, first=10, tag='Training', tensorboard_log_dir='atis_log', model=z) #progress_printer = ProgressPrinter(tag='Training') t = 0 for epoch in range(max_epochs): # loop over epochs epoch_end = (epoch+1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch # BUGBUG? The change of minibatch_size parameter vv has no effect. data = reader.next_minibatch(min(minibatch_size, epoch_end-t), input_map=input_map) # fetch minibatch trainer.train_minibatch(data) # update model with it t += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress #def trace_node(name): # nl = [n for n in z.parameters if n.name() == name] # if len(nl) > 0: # print (name, np.asarray(nl[0].value)) #trace_node('W') #trace_node('stabilizer_param') loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) return loss, metric
def create_adam_learner(learn_params, learning_rate=0.0005, gradient_clipping_threshold_per_sample=0.001): """ Create adam learner """ lr_schedule = learner.learning_rate_schedule(learning_rate, learner.UnitType.sample) momentum = learner.momentum_schedule(0.90) gradient_clipping_threshold_per_sample = gradient_clipping_threshold_per_sample gradient_clipping_with_truncation = True momentum_var = learner.momentum_schedule(0.999) lr = learner.adam_sgd( learn_params, lr_schedule, momentum, True, momentum_var, low_memory=False, gradient_clipping_threshold_per_sample= gradient_clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) learner_desc = 'Alg: Adam, learning rage: {0}, momentum: {1}, gradient clip: {2}'.format( learning_rate, momentum[0], gradient_clipping_threshold_per_sample) logger.log("Create learner. {0}".format(learner_desc)) return lr
def train(reader, model, max_epochs): # Input variables denoting the features and label data query = Input(input_dim, is_sparse=False) slot_labels = Input(num_labels, is_sparse=True) # TODO: make sparse once it works # apply model to input z = model(query) # loss and metric ce = cross_entropy_with_softmax(z, slot_labels) pe = classification_error (z, slot_labels) # training config epoch_size = 36000 minibatch_size = 70 num_mbs_to_show_result = 100 momentum_time_constant = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)) # TODO: Change to round number. This is 664.39. 700? lr_schedule = [0.003]*2+[0.0015]*12+[0.0003] # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values) # trainer object lr_per_sample = learning_rate_schedule(lr_schedule, UnitType.sample, epoch_size) learner = adam_sgd(z.parameters, lr=lr_per_sample, momentum=momentum_time_constant, low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = Trainer(z, ce, pe, [learner]) # define mapping from reader streams to network inputs input_map = { query : reader.streams.query, slot_labels : reader.streams.slot_labels } # process minibatches and perform model training log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging #progress_printer = ProgressPrinter(tag='Training') t = 0 for epoch in range(max_epochs): # loop over epochs epoch_end = (epoch+1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch # BUGBUG? The change of minibatch_size parameter vv has no effect. data = reader.next_minibatch(min(minibatch_size, epoch_end-t), input_map=input_map) # fetch minibatch trainer.train_minibatch(data) # update model with it t += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress #def trace_node(name): # nl = [n for n in z.parameters if n.name() == name] # if len(nl) > 0: # print (name, np.asarray(nl[0].value)) #trace_node('W') #trace_node('stabilizer_param') loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) return loss, metric
def train(reader, model, max_epochs): # declare the model's input dimension, so that the saved model is usable model.update_signature(Sequence[SparseTensor[vocab_size]]) #model.declare_args(vocab_size) # criterion: (model args, labels) -> (loss, metric) # here (query, slot_labels) -> (ce, errs) criterion = create_criterion_function(model) labels = reader.streams.slot_labels #labels = reader.streams.intent_labels # for intent classification #from cntk.graph import plot #plot(criterion, filename=data_dir + "/model.pdf") # iteration parameters --needed here because learner schedule needs it epoch_size = 36000 minibatch_size = 70 #epoch_size = 1000 ; max_epochs = 1 # uncomment for faster testing # SGD parameters learner = adam_sgd(criterion.parameters, lr = learning_rate_schedule([0.003]*2+[0.0015]*12+[0.0003], UnitType.sample, epoch_size), momentum = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)), low_memory = True, gradient_clipping_threshold_per_sample = 15, gradient_clipping_with_truncation = True) # trainer trainer = Trainer(None, criterion, learner) # process minibatches and perform model training log_number_of_parameters(model) ; print() progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging #progress_printer = ProgressPrinter(tag='Training') t = 0 for epoch in range(max_epochs): # loop over epochs peek(model, epoch) # log some interesting info epoch_end = (epoch+1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch # BUGBUG: The change of minibatch_size parameter vv has no effect. # TODO: change all examples to this pattern; then remove this comment data = reader.next_minibatch(min(minibatch_size, epoch_end-t)) # fetch minibatch #trainer.train_minibatch(data[reader.streams.query], data[labels]) # update model with it trainer.train_minibatch({criterion.arguments[0]: data[reader.streams.query], criterion.arguments[1]: data[labels]}) # update model with it t += data[labels].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) return loss, metric # return values from last epoch
def create_trainer(): # Will take the model and the batch generator to create a Trainer # Will return the input variables, trainer variable, model and the embedding layer ################################################## ################### Inputs ####################### ################################################## word_one_hot = C.input_variable((G.embedding_vocab_size), np.float32, is_sparse=True, name='word_input') context_one_hots = [ C.input_variable((G.embedding_vocab_size), np.float32, is_sparse=True, name='context_input{}'.format(i)) for i in range(context_size) ] negative_one_hots = [ C.input_variable((G.embedding_vocab_size), np.float32, is_sparse=True, name='negative_input{}'.format(i)) for i in range(G.negative) ] # The target labels should have first as 1 and rest as 0 target = C.input_variable((G.negative + 1), np.float32) word_negative_context_product, embedding_layer = create_word2vec_cbow_model( word_one_hot, context_one_hots, negative_one_hots) loss = C.binary_cross_entropy(word_negative_context_product, target) eval_loss = C.binary_cross_entropy(word_negative_context_product, target) lr_schedule = learning_rate_schedule(G.learning_rate, UnitType.minibatch) learner = adam_sgd(word_negative_context_product.parameters, lr=lr_schedule, momentum=momentum_as_time_constant_schedule(700)) trainer = Trainer(word_negative_context_product, (loss, eval_loss), learner) return word_one_hot, context_one_hots, negative_one_hots, target, trainer, word_negative_context_product, embedding_layer
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size): # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause # an error since the training criterion uses a reduced sequence axis for the labels. # This is because it removes the initial <s> symbol. Hence, we must leave the model # with unspecified input shapes and axes. # create the training wrapper for the s2smodel, as well as the criterion function model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # also wire in a greedy decoder so that we can properly log progress on a validation example # This is not used for the actual training process. model_greedy = create_model_greedy(s2smodel) # This does not need to be done in training generally though # Instantiate the trainer object to drive the model training minibatch_size = 72 lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? learner = adam_sgd(model_train.parameters, lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size), momentum = momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) trainer = Trainer(None, criterion, learner) # Get minibatches of sequences to train with and perform model training total_samples = 0 mbs = 0 eval_freq = 100 # print out some useful training information log_number_of_parameters(model_train) ; print() progress_printer = ProgressPrinter(freq=30, tag='Training') #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file sparse_to_dense = create_sparse_to_dense(input_vocab_dim) for epoch in range(max_epochs): print("Saving model to '%s'" % model_path(epoch)) s2smodel.save(model_path(epoch)) while total_samples < (epoch+1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels]) trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels]}) progress_printer.update_with_trainer(trainer, with_metric=True) # log progress # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % eval_freq == 0: mb_valid = valid_reader.next_minibatch(1) # run an eval on the decoder output model (i.e. don't use the groundtruth) e = model_greedy(mb_valid[valid_reader.streams.features]) print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w)) print("->") print(format_sequences(e, i2w)) # debugging attention if use_attention: debug_attention(model_greedy, mb_valid[valid_reader.streams.features]) total_samples += mb_train[train_reader.streams.labels].num_samples mbs += 1 # log a summary of the stats for the epoch progress_printer.epoch_summary(with_metric=True) # done: save the final model print("Saving final model to '%s'" % model_path(max_epochs)) s2smodel.save(model_path(max_epochs)) print("%d epochs complete." % max_epochs)
def train(reader, model, max_epochs): # declare the model's input dimension, so that the saved model is usable model.update_signature(Sequence[SparseTensor[vocab_size]]) #model.declare_args(vocab_size) # criterion: (model args, labels) -> (loss, metric) # here (query, slot_labels) -> (ce, errs) criterion = create_criterion_function(model) labels = reader.streams.slot_labels #labels = reader.streams.intent_labels # for intent classification #from cntk.graph import plot #plot(criterion, filename=data_dir + "/model.pdf") # iteration parameters --needed here because learner schedule needs it epoch_size = 36000 minibatch_size = 70 #epoch_size = 1000 ; max_epochs = 1 # uncomment for faster testing # SGD parameters learner = adam_sgd( criterion.parameters, lr=learning_rate_schedule([0.003] * 2 + [0.0015] * 12 + [0.0003], UnitType.sample, epoch_size), momentum=momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)), low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) # trainer trainer = Trainer(None, criterion, learner) # process minibatches and perform model training log_number_of_parameters(model) print() progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging #progress_printer = ProgressPrinter(tag='Training') t = 0 for epoch in range(max_epochs): # loop over epochs peek(model, epoch) # log some interesting info epoch_end = (epoch + 1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch # BUGBUG: The change of minibatch_size parameter vv has no effect. # TODO: change all examples to this pattern; then remove this comment data = reader.next_minibatch(min(minibatch_size, epoch_end - t)) # fetch minibatch #trainer.train_minibatch(data[reader.streams.query], data[labels]) # update model with it trainer.train_minibatch({ criterion.arguments[0]: data[reader.streams.query], criterion.arguments[1]: data[labels] }) # update model with it t += data[labels].num_samples # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary( with_metric=True) return loss, metric # return values from last epoch
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size): # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause # an error since the training criterion uses a reduced sequence axis for the labels. # This is because it removes the initial <s> symbol. Hence, we must leave the model # with unspecified input shapes and axes. # create the training wrapper for the s2smodel, as well as the criterion function model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # also wire in a greedy decoder so that we can properly log progress on a validation example # This is not used for the actual training process. model_greedy = create_model_greedy(s2smodel) # This does not need to be done in training generally though # Instantiate the trainer object to drive the model training minibatch_size = 72 lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? learner = adam_sgd( model_train.parameters, lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4], UnitType.sample, epoch_size), momentum=momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) trainer = Trainer(None, criterion, learner) # Get minibatches of sequences to train with and perform model training total_samples = 0 mbs = 0 eval_freq = 100 # print out some useful training information log_number_of_parameters(model_train) print() progress_printer = ProgressPrinter(freq=30, tag='Training') #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file sparse_to_dense = create_sparse_to_dense(input_vocab_dim) for epoch in range(max_epochs): print("Saving model to '%s'" % model_path(epoch)) s2smodel.save(model_path(epoch)) while total_samples < (epoch + 1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels]) trainer.train_minibatch({ criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels] }) progress_printer.update_with_trainer( trainer, with_metric=True) # log progress # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % eval_freq == 0: mb_valid = valid_reader.next_minibatch(1) # run an eval on the decoder output model (i.e. don't use the groundtruth) e = model_greedy(mb_valid[valid_reader.streams.features]) print( format_sequences( sparse_to_dense( mb_valid[valid_reader.streams.features]), i2w)) print("->") print(format_sequences(e, i2w)) # debugging attention if use_attention: debug_attention(model_greedy, mb_valid[valid_reader.streams.features]) total_samples += mb_train[train_reader.streams.labels].num_samples mbs += 1 # log a summary of the stats for the epoch progress_printer.epoch_summary(with_metric=True) # done: save the final model print("Saving final model to '%s'" % model_path(max_epochs)) s2smodel.save(model_path(max_epochs)) print("%d epochs complete." % max_epochs)