def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer): if network['name'] == 'resnet20': lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network['name'] == 'resnet110': lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: return RuntimeError("Unknown model name!") momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # learner object if block_size != None and num_quantization_bits != 32: raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.") local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)
def train_lm(testing=False): data = DataReader(token_to_id_path, segment_sepparator) # Create model nodes for the source and target inputs input_sequence, label_sequence = create_inputs(data.vocab_dim) # Create the model. It has three output nodes # z: the input to softmax that provides the latent representation of the next token # cross_entropy: this is used training criterion # error: this a binary indicator if the model predicts the correct token z, cross_entropy, error = create_model(input_sequence, label_sequence, data.vocab_dim, hidden_dim) # For measurement we use the (build in) full softmax. full_ce = C.cross_entropy_with_softmax(z, label_sequence) # print out some useful training information log_number_of_parameters(z) ; print() # Run the training loop num_trained_samples = 0 num_trained_samples_since_last_report = 0 # Instantiate the trainer object to drive the model training lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample) momentum_schedule = momentum_as_time_constant_schedule(momentum_as_time_constant) gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, (cross_entropy, error), learner) last_avg_ce = 0 for epoch_count in range(num_epochs): for features, labels, token_count in data.minibatch_generator(train_file_path, sequence_length, sequences_per_batch): arguments = ({input_sequence : features, label_sequence : labels}) t_start = timeit.default_timer() trainer.train_minibatch(arguments) t_end = timeit.default_timer() samples_per_second = token_count / (t_end - t_start) # Print progress report every num_samples_between_progress_report samples if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0: av_ce = average_cross_entropy(full_ce, input_sequence, label_sequence, data) print_progress(samples_per_second, av_ce, num_trained_samples, t_start) num_trained_samples_since_last_report = 0 last_avg_ce = av_ce num_trained_samples += token_count num_trained_samples_since_last_report += token_count if not testing: # after each epoch save the model model_filename = "models/lm_epoch%d.dnn" % epoch_count z.save(model_filename) print("Saved model to '%s'" % model_filename) return last_avg_ce
def train_model(reader, model, criterion, epoch_size=50000, max_epochs=80): minibatch_size = 64 # learning parameters learner = momentum_sgd(model.parameters, lr = learning_rate_schedule([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], unit=UnitType.sample, epoch_size=epoch_size), momentum = momentum_as_time_constant_schedule([0]*20+[600]*20+[1200], epoch_size=epoch_size), l2_regularization_weight = 0.002) # trainer object trainer = Trainer(None, criterion, learner) # perform model training log_number_of_parameters(model) ; print() progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch mb = reader.next_minibatch(min(minibatch_size, epoch_size - sample_count)) # fetch minibatch. #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels]) trainer.train_minibatch({criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels]}) sample_count += mb[reader.streams.labels].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) model.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) # return evaluation error. return loss, metric # return values from last epoch
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer): if network['name'] == 'resnet20': lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network['name'] == 'resnet110': lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: return RuntimeError("Unknown model name!") momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # learner object if block_size != None and num_quantization_bits != 32: raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.") local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)
def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up): # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True # Create learner if block_size is not None and num_quantization_bits != default_quantization_bits: raise RuntimeError( "Block momentum cannot be used with quantization, please remove quantized_bits option." ) local_learner = momentum_sgd( network['output'].parameters, lr_per_minibatch, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) return Trainer(network['output'], (network['ce'], network['pe']), learner)
def init_trainer(config, text_lines, slot_value_lines): hidden_dim = config.hidden_dim segment_begin = config.segment_begin segment_end = config.segment_end data = DataReader(text_lines, slot_value_lines, segment_begin, segment_end) # Create model nodes for the source and target inputs vocab_dim = data.vocab_dim sv_dim = data.sv_dim input_sequence, sv_pair, label_sequence, inputH, inputC = create_inputs(hidden_dim, sv_dim, vocab_dim) model = create_model(hidden_dim, sv_dim, vocab_dim) z = model(input_sequence, inputH, inputC, sv_pair) # cross_entropy: this is used training criterion ce, err = cross_entropy_with_full_softmax(z, label_sequence, sv_dim, vocab_dim) learning_rate = config.learning_rate momentum_as_time_constant = config.momentum_as_time_constant clipping_threshold_per_sample = config.clipping_threshold_per_sample lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample) gradient_clipping_with_truncation = True momentum_schedule = momentum_as_time_constant_schedule(momentum_as_time_constant) # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, (ce, err), learner) inputs = [input_sequence, sv_pair, label_sequence, inputH, inputC] return data, z, trainer, inputs
def Evaluator(criterion): loss, metric = Trainer._get_loss_metric(criterion) parameters = set(loss.parameters) if metric: parameters |= set(metric.parameters) dummy_learner = momentum_sgd(tuple(parameters), lr = learning_rate_schedule(1, UnitType.minibatch), momentum = momentum_as_time_constant_schedule(0)) return Trainer(None, (loss, metric), dummy_learner)
def train_lm(training_file, epochs, max_num_minibatches): # load the data and vocab data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab(training_file) # Model the source and target inputs to the model input_sequence, label_sequence = create_inputs(vocab_dim) # create the model model = create_model(vocab_dim) # and apply it to the input sequence z = model(input_sequence) # setup the criterions (loss and metric) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd(z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) sample_freq = 1000 minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs) # print out some useful training information log_number_of_parameters(z) print ("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch)) print() for e in range(0, epochs): # Specify the mapping of input variables in the model to actual minibatch data to be trained with # If it's the start of the data, we specify that we are looking at a new sequence (True) mask = [True] for b in range(0, minibatches_per_epoch): # get the data features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim) arguments = ({input_sequence : features, label_sequence : labels}, mask) mask = [False] trainer.train_minibatch(arguments) global_minibatch = e*minibatches_per_epoch + b if global_minibatch % sample_freq == 0: print(sample(z, ix_to_char, vocab_dim, char_to_ix)) model_filename = "models/shakespeare_epoch%d.dnn" % (e+1) z.save(model_filename) print("Saved model to '%s'" % model_filename)
def train_fast_rcnn(debug_output=False): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = input((num_channels, image_height, image_width)) roi_input = input((num_rois, 4)) label_input = input((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) return frcn_output
def train_model(debug_output=False): # Create the minibatch source minibatch_source = create_reader(map_file) # Input variables denoting features, rois and label data image_input = input_variable((num_channels, image_height, image_width)) label_input = input_variable((num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, label_input: minibatch_source.streams.labels } # Instantiate the Fast R-CNN prediction model and loss function model = modify_model(image_input, num_classes) ce = cross_entropy_with_softmax(model, label_input) pe = classification_error(model, label_input) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] momentum_time_constant = 10 lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)] learner = momentum_sgd(model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(model, (ce, pe), learner, progress_writers) # Get minibatches of images and perform model training print("Training image classifier for %s epochs." % max_epochs) log_number_of_parameters(model) for epoch in range(max_epochs): sample_count = 0 while sample_count < epoch_size: data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) sample_count += trainer.previous_minibatch_sample_count trainer.summarize_training_progress() model.save( os.path.join(output_model_folder, 'withcrops_{}.dnn'.format(epoch + 1))) return
def train_fast_rcnn(debug_output=False, model_path=model_file): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = C.input_variable((num_channels, image_height, image_width)) roi_input = C.input_variable((num_rois, 4)) label_input = C.input_variable((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes, model_path) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) return frcn_output
def Evaluator(model, criterion): from cntk import Trainer from cntk.learners import momentum_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule loss, metric = Trainer._get_loss_metric(criterion) parameters = set(loss.parameters) if model: parameters |= set(model.parameters) if metric: parameters |= set(metric.parameters) dummy_learner = momentum_sgd(tuple(parameters), lr = learning_rate_schedule(1, UnitType.minibatch), momentum = momentum_as_time_constant_schedule(0)) return Trainer(model, (loss, metric), dummy_learner)
def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up): from Sequence2Sequence import create_criterion_function, create_model_train model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # Create learner if block_size is not None and num_quantization_bits != default_quantization_bits: raise RuntimeError( "Block momentum cannot be used with quantization, please remove quantized_bits option." ) lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? local_learner = fsadagrad( model_train.parameters, lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4], UnitType.sample, epoch_size), momentum=momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) trainer = Trainer(None, criterion, learner, progress_printer) train_bind = { criterion.arguments[0]: train_reader.streams.features, criterion.arguments[1]: train_reader.streams.labels } training_session( mb_source=train_reader, trainer=trainer, model_inputs_to_streams=train_bind, mb_size=minibatch_size, progress_frequency=epoch_size, checkpoint_config=CheckpointConfig(frequency=epoch_size, filename=os.path.join( model_path, "SequenceToSequence"), restore=False), cv_config=CrossValidationConfig(source=test_reader, mb_size=minibatch_size)).train()
def train(reader, model, max_epochs): # declare the model's input dimension, so that the saved model is usable model.update_signature(Sequence[SparseTensor[vocab_size]]) #model.declare_args(vocab_size) # criterion: (model args, labels) -> (loss, metric) # here (query, slot_labels) -> (ce, errs) criterion = create_criterion_function(model) labels = reader.streams.slot_labels #labels = reader.streams.intent_labels # for intent classification #from cntk.logging.graph import plot #plot(criterion, filename=data_dir + "/model.pdf") # iteration parameters --needed here because learner schedule needs it epoch_size = 36000 minibatch_size = 70 #epoch_size = 1000 ; max_epochs = 1 # uncomment for faster testing # SGD parameters learner = fsadagrad(criterion.parameters, lr = learning_rate_schedule([0.003]*2+[0.0015]*12+[0.0003], UnitType.sample, epoch_size), momentum = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)), gradient_clipping_threshold_per_sample = 15, gradient_clipping_with_truncation = True) # trainer trainer = Trainer(None, criterion, learner) # process minibatches and perform model training log_number_of_parameters(model) ; print() progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging #progress_printer = ProgressPrinter(tag='Training') t = 0 for epoch in range(max_epochs): # loop over epochs peek(model, epoch) # log some interesting info epoch_end = (epoch+1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch # BUGBUG: The change of minibatch_size parameter vv has no effect. # TODO: change all examples to this pattern; then remove this comment data = reader.next_minibatch(min(minibatch_size, epoch_end-t)) # fetch minibatch #trainer.train_minibatch(data[reader.streams.query], data[labels]) # update model with it trainer.train_minibatch({criterion.arguments[0]: data[reader.streams.query], criterion.arguments[1]: data[labels]}) # update model with it t += data[labels].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) return loss, metric # return values from last epoch
def train_model(reader, reader_test, model, epoch_size=50000, max_epochs=80): # declare the model's input dimension # Training does not require this, but it is needed for deployment. model.update_signature((num_channels, image_height, image_width)) # criterion function. This is what is being trained trained. # Model gets "sandwiched" between normalization (not part of model proper) and criterion. criterion = create_criterion_function(model, normalize=lambda x: x / 256) #debughelpers.dump_function(criterion, 'criterion') #from cntk.logging.graph import plot #plot(criterion, filename=os.path.join(model_path, "ConvNet_CIFAR10_DataAug.pdf")) # iteration parameters minibatch_size = 64 #epoch_size = 1000 ; max_epochs = 1 # for faster testing # learning parameters learner = momentum_sgd(model.parameters, lr = learning_rate_schedule([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], unit=UnitType.sample, epoch_size=epoch_size), momentum = momentum_as_time_constant_schedule([0]*20+[600]*20+[1200], epoch_size=epoch_size), l2_regularization_weight = 0.002) # trainer object trainer = Trainer(None, criterion, learner) # perform model training log_number_of_parameters(model) ; print() progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch mb = reader.next_minibatch(min(minibatch_size, epoch_size - sample_count)) # fetch minibatch. #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels]) trainer.train_minibatch({criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels]}) sample_count += mb[reader.streams.labels].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) model.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) # return evaluation error. return loss, metric # return values from last epoch
def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up): from Sequence2Sequence import create_criterion_function, create_model_train model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # Create learner if block_size is not None and num_quantization_bits != default_quantization_bits: raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.") lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? local_learner = fsadagrad(model_train.parameters, lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size), momentum = momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) trainer = Trainer(None, criterion, learner, progress_printer) train_bind = {criterion.arguments[0]: train_reader.streams.features, criterion.arguments[1]: train_reader.streams.labels} training_session( mb_source = train_reader, trainer=trainer, model_inputs_to_streams=train_bind, mb_size=minibatch_size, progress_frequency=epoch_size, checkpoint_config=CheckpointConfig(frequency = epoch_size, filename = os.path.join(model_path, "SequenceToSequence"), restore = False), cv_config=CrossValidationConfig(source=test_reader, mb_size=minibatch_size) ).train()
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size): # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause # an error since the training criterion uses a reduced sequence axis for the labels. # This is because it removes the initial <s> symbol. Hence, we must leave the model # with unspecified input shapes and axes. # create the training wrapper for the s2smodel, as well as the criterion function model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # also wire in a greedy decoder so that we can properly log progress on a validation example # This is not used for the actual training process. model_greedy = create_model_greedy(s2smodel) # This does not need to be done in training generally though # Instantiate the trainer object to drive the model training minibatch_size = 72 lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? learner = adam_sgd( model_train.parameters, lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4], UnitType.sample, epoch_size), momentum=momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) trainer = Trainer(None, criterion, learner) # Get minibatches of sequences to train with and perform model training total_samples = 0 mbs = 0 eval_freq = 100 # print out some useful training information log_number_of_parameters(model_train) print() progress_printer = ProgressPrinter(freq=30, tag='Training') #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file sparse_to_dense = create_sparse_to_dense(input_vocab_dim) for epoch in range(max_epochs): print("Saving model to '%s'" % model_path(epoch)) s2smodel.save(model_path(epoch)) while total_samples < (epoch + 1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels]) trainer.train_minibatch({ criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels] }) progress_printer.update_with_trainer( trainer, with_metric=True) # log progress # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % eval_freq == 0: mb_valid = valid_reader.next_minibatch(1) # run an eval on the decoder output model (i.e. don't use the groundtruth) e = model_greedy(mb_valid[valid_reader.streams.features]) print( format_sequences( sparse_to_dense( mb_valid[valid_reader.streams.features]), i2w)) print("->") print(format_sequences(e, i2w)) # debugging attention if use_attention: debug_attention(model_greedy, mb_valid[valid_reader.streams.features]) total_samples += mb_train[train_reader.streams.labels].num_samples mbs += 1 # log a summary of the stats for the epoch progress_printer.epoch_summary(with_metric=True) # done: save the final model print("Saving final model to '%s'" % model_path(max_epochs)) s2smodel.save(model_path(max_epochs)) print("%d epochs complete." % max_epochs)
model = Sequential([ For( range(2), lambda: Sequential( [Stabilizer(), Recurrence(LSTM(256), go_backwards=False)])), Dense(vocab_size) ]) z = model(input_sequence) z_sm = cntk.softmax(z) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) log_number_of_parameters(z) def sample(net, prime_text='', use_hardmax=True, length=100, temperature=1.0):
def train_and_evaluate(reader, reader_test, model, epoch_size=50000, max_epochs=5): # declare the model's input dimension # Training does not require this, but it is needed for deployment. model.update_signature((num_channels, image_height, image_width)) # criterion function. This is what is being trained trained. # Model gets "sandwiched" between normalization (not part of model proper) and criterion. criterion = create_criterion_function(model, normalize=lambda x: x / 256) #debughelpers.dump_function(criterion, 'criterion') #from cntk.logging.graph import plot #plot(criterion, filename=os.path.join(model_path, "ConvNet_CIFAR10_DataAug.pdf")) # iteration parameters minibatch_size = 64 #epoch_size = 1000 ; max_epochs = 1 # for faster testing # learning parameters learner = momentum_sgd( model.parameters, lr=learning_rate_schedule([0.0015625] * 20 + [0.00046875] * 20 + [0.00015625] * 20 + [0.000046875] * 10 + [0.000015625], unit=UnitType.sample, epoch_size=epoch_size), momentum=momentum_as_time_constant_schedule([0] * 20 + [600] * 20 + [1200], epoch_size=epoch_size), l2_regularization_weight=0.002) # trainer object trainer = Trainer(None, criterion, learner) # perform model training log_number_of_parameters(model) print() progress_printer = ProgressPrinter(tag='Training') for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch mb = reader.next_minibatch( min(minibatch_size, epoch_size - sample_count)) # fetch minibatch. #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels]) trainer.train_minibatch({ criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels] }) sample_count += mb[ reader.streams. labels].num_samples # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary( with_metric=True) model.save( os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) progress_printer.epoch_summary(with_metric=True) # TODO: we should be done here #return metric_numer/metric_denom ### Evaluation action # evaluate with current Trainer instance; just to make sure we save and load the model correctly and BN works now --TODO: delete once confirmed epoch_size = 10000 minibatch_size = 16 metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: mbsize = min(minibatch_size, epoch_size - sample_count) mb = reader_test.next_minibatch(mbsize) metric_numer += mbsize * trainer.test_minibatch( { criterion.arguments[0]: mb[reader_test.streams.features], criterion.arguments[1]: mb[reader_test.streams.labels] }) metric_denom += mbsize sample_count += mb[reader_test.streams.labels].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") # return evaluation error. return loss, metric # return values from last epoch
def train(reader, model, max_epochs): # declare the model's input dimension, so that the saved model is usable model.update_signature(Sequence[SparseTensor[vocab_size]]) #model.declare_args(vocab_size) # criterion: (model args, labels) -> (loss, metric) # here (query, slot_labels) -> (ce, errs) criterion = create_criterion_function(model) labels = reader.streams.slot_labels #labels = reader.streams.intent_labels # for intent classification #from cntk.logging.graph import plot #plot(criterion, filename=data_dir + "/model.pdf") # iteration parameters --needed here because learner schedule needs it epoch_size = 36000 minibatch_size = 70 #epoch_size = 1000 ; max_epochs = 1 # uncomment for faster testing # SGD parameters learner = fsadagrad( criterion.parameters, lr=learning_rate_schedule([0.003] * 2 + [0.0015] * 12 + [0.0003], UnitType.sample, epoch_size), momentum=momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)), gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) # trainer trainer = Trainer(None, criterion, learner) # process minibatches and perform model training log_number_of_parameters(model) print() progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging #progress_printer = ProgressPrinter(tag='Training') t = 0 for epoch in range(max_epochs): # loop over epochs peek(model, epoch) # log some interesting info epoch_end = (epoch + 1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch # BUGBUG: The change of minibatch_size parameter vv has no effect. # TODO: change all examples to this pattern; then remove this comment data = reader.next_minibatch(min(minibatch_size, epoch_end - t)) # fetch minibatch #trainer.train_minibatch(data[reader.streams.query], data[labels]) # update model with it trainer.train_minibatch({ criterion.arguments[0]: data[reader.streams.query], criterion.arguments[1]: data[labels] }) # update model with it t += data[labels].num_samples # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary( with_metric=True) return loss, metric # return values from last epoch
def train(train_reader, valid_reader, vocab, i2w, s2smodel, max_epochs, epoch_size): # Note: We would like to set the signature of 's2smodel' (s2smodel.update_signature()), but that will cause # an error since the training criterion uses a reduced sequence axis for the labels. # This is because it removes the initial <s> symbol. Hence, we must leave the model # with unspecified input shapes and axes. # create the training wrapper for the s2smodel, as well as the criterion function model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # also wire in a greedy decoder so that we can properly log progress on a validation example # This is not used for the actual training process. model_greedy = create_model_greedy(s2smodel) # This does not need to be done in training generally though # Instantiate the trainer object to drive the model training minibatch_size = 72 lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? learner = fsadagrad(model_train.parameters, lr = learning_rate_schedule([lr]*2+[lr/2]*3+[lr/4], UnitType.sample, epoch_size), momentum = momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) trainer = Trainer(None, criterion, learner) # Get minibatches of sequences to train with and perform model training total_samples = 0 mbs = 0 eval_freq = 100 # print out some useful training information log_number_of_parameters(model_train) ; print() progress_printer = ProgressPrinter(freq=30, tag='Training') #progress_printer = ProgressPrinter(freq=30, tag='Training', log_to_file=model_path_stem + ".log") # use this to log to file sparse_to_dense = create_sparse_to_dense(input_vocab_dim) for epoch in range(max_epochs): print("Saving model to '%s'" % model_path(epoch)) s2smodel.save(model_path(epoch)) while total_samples < (epoch+1) * epoch_size: # get next minibatch of training data mb_train = train_reader.next_minibatch(minibatch_size) #trainer.train_minibatch(mb_train[train_reader.streams.features], mb_train[train_reader.streams.labels]) trainer.train_minibatch({criterion.arguments[0]: mb_train[train_reader.streams.features], criterion.arguments[1]: mb_train[train_reader.streams.labels]}) progress_printer.update_with_trainer(trainer, with_metric=True) # log progress # every N MBs evaluate on a test sequence to visually show how we're doing if mbs % eval_freq == 0: mb_valid = valid_reader.next_minibatch(1) # run an eval on the decoder output model (i.e. don't use the groundtruth) e = model_greedy(mb_valid[valid_reader.streams.features]) print(format_sequences(sparse_to_dense(mb_valid[valid_reader.streams.features]), i2w)) print("->") print(format_sequences(e, i2w)) # debugging attention if use_attention: debug_attention(model_greedy, mb_valid[valid_reader.streams.features]) total_samples += mb_train[train_reader.streams.labels].num_samples mbs += 1 # log a summary of the stats for the epoch progress_printer.epoch_summary(with_metric=True) # done: save the final model print("Saving final model to '%s'" % model_path(max_epochs)) s2smodel.save(model_path(max_epochs)) print("%d epochs complete." % max_epochs)
def conv3d_ucf11(train_reader, test_reader, max_epochs=30): # Replace 0 with 1 to get detailed log. set_computation_network_trace_level(0) # These values must match for both train and test reader. image_height = train_reader.height image_width = train_reader.width num_channels = train_reader.channel_count sequence_length = train_reader.sequence_length num_output_classes = train_reader.label_count # Input variables denoting the features and label data input_var = input( (num_channels, sequence_length, image_height, image_width), np.float32) label_var = input(num_output_classes, np.float32) # Instantiate simple 3D Convolution network inspired by VGG network # and http://vlg.cs.dartmouth.edu/c3d/c3d_video.pdf with default_options(activation=relu): z = Sequential([ Convolution3D((3, 3, 3), 64, pad=True), MaxPooling((1, 2, 2), (1, 2, 2)), For( range(3), lambda i: [ Convolution3D((3, 3, 3), [96, 128, 128][i], pad=True), Convolution3D((3, 3, 3), [96, 128, 128][i], pad=True), MaxPooling((2, 2, 2), (2, 2, 2)) ]), For(range(2), lambda: [Dense(1024), Dropout(0.5)]), Dense(num_output_classes, activation=None) ])(input_var) # loss and classification error. ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # training config epoch_size = 1322 # for now we manually specify epoch size minibatch_size = 4 # Set learning parameters lr_per_sample = [0.01] * 10 + [0.001] * 10 + [0.0001] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) momentum_time_constant = 4096 mm_schedule = momentum_as_time_constant_schedule([momentum_time_constant], epoch_size=epoch_size) # Instantiate the trainer object to drive the model training learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, True) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(z, (ce, pe), learner, progress_printer) log_number_of_parameters(z) print() # Get minibatches of images to train with and perform model training for epoch in range(max_epochs): # loop over epochs train_reader.reset() while train_reader.has_more(): videos, labels, current_minibatch = train_reader.next_minibatch( minibatch_size) trainer.train_minibatch({input_var: videos, label_var: labels}) trainer.summarize_training_progress() # Test data for trained model epoch_size = 332 minibatch_size = 2 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 minibatch_index = 0 test_reader.reset() while test_reader.has_more(): videos, labels, current_minibatch = test_reader.next_minibatch( minibatch_size) # minibatch data to be trained with metric_numer += trainer.test_minibatch({ input_var: videos, label_var: labels }) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format( minibatch_index + 1, (metric_numer * 100.0) / metric_denom, metric_denom)) print("") return metric_numer / metric_denom
def train_model(reader, reader_test, model, epoch_size=50000, max_epochs=80): # declare the model's input dimension # Training does not require this, but it is needed for deployment. model.update_signature((num_channels, image_height, image_width)) # criterion function. This is what is being trained trained. # Model gets "sandwiched" between normalization (not part of model proper) and criterion. criterion = create_criterion_function(model, normalize=lambda x: x / 256) #debughelpers.dump_function(criterion, 'criterion') #from cntk.logging.graph import plot #plot(criterion, filename=os.path.join(model_path, "ConvNet_CIFAR10_DataAug.pdf")) # iteration parameters minibatch_size = 64 #epoch_size = 1000 ; max_epochs = 1 # for faster testing # learning parameters learner = momentum_sgd( model.parameters, lr=learning_rate_schedule([0.0015625] * 20 + [0.00046875] * 20 + [0.00015625] * 20 + [0.000046875] * 10 + [0.000015625], unit=UnitType.sample, epoch_size=epoch_size), momentum=momentum_as_time_constant_schedule([0] * 20 + [600] * 20 + [1200], epoch_size=epoch_size), l2_regularization_weight=0.002) # trainer object trainer = Trainer(None, criterion, learner) # perform model training log_number_of_parameters(model) print() progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch mb = reader.next_minibatch( min(minibatch_size, epoch_size - sample_count)) # fetch minibatch. #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels]) trainer.train_minibatch({ criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels] }) sample_count += mb[ reader.streams. labels].num_samples # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary( with_metric=True) model.save( os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) # return evaluation error. return loss, metric # return values from last epoch
def init_train_fast_rcnn(image_height, image_width, num_classes, num_rois, mb_size, max_epochs, cntk_lr_per_image, l2_reg_weight, momentum_time_constant, base_path, boSkipTraining=False, debug_output=False, tensorboardLogDir=None): #make sure we use GPU for training if use_default_device().type() == 0: print("WARNING: using CPU for training.") else: print("Using GPU for training.") # Instantiate the Fast R-CNN prediction model image_input = input_variable((3, image_height, image_width)) roi_input = input_variable((num_rois, 4)) label_input = input_variable((num_rois, num_classes)) frcn_output, frcn_penultimateLayer = frcn_predictor( image_input, roi_input, num_classes, base_path) if boSkipTraining: print("Using pre-trained DNN without refinement") return frcn_penultimateLayer # Create the minibatch source and define mapping from reader streams to network inputs minibatch_source, epoch_size = create_mb_source("train", image_height, image_width, num_classes, num_rois, base_path, randomize=True) input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # set loss / error functions ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, "graph_frcn.png") # set the progress printer(s) progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)] if tensorboardLogDir != None: tensorboard_writer = TensorBoardProgressWriter( freq=10, log_dir=tensorboardLogDir, model=frcn_output) progress_writers.append(tensorboard_writer) # Set learning parameters and instantiate the trainer object lr_per_sample = [f / float(num_rois) for f in cntk_lr_per_image] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(frcn_output, (ce, pe), learner, progress_writers) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): sample_count = 0 # loop over minibatches in the epoch while sample_count < epoch_size: data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) if sample_count % 100 == 1: print( "Training in progress: epoch {} of {}, sample count {} of {}" .format(epoch, max_epochs, sample_count, epoch_size)) trainer.train_minibatch(data) sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboardLogDir != None: for parameter in frcn_output.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", np.mean(parameter.value), epoch) tensorboard_writer.write_value(parameter.uid + "/std", np.std(parameter.value), epoch) tensorboard_writer.write_value(parameter.uid + "/absSum", np.sum(np.abs(parameter.value)), epoch) if debug_output: frcn_output.save_model("frcn_py_%s.model" % (epoch + 1)) return frcn_output
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None, model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width), name='features') label_var = C.input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0] * 80 + [0.1] * 40 + [0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1] * 1 + [1.0] * 80 + [0.1] * 40 + [0.01] else: raise RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters minibatch_size = 128 momentum_time_constant = -minibatch_size / np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr / minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # progress writers progress_writers = [ ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat) ] tensorboard_writer = None if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter( freq=10, log_dir=tensorboard_logdir, model=z) progress_writers.append(tensorboard_writer) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, (ce, pe), learner, progress_writers) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() # perform model training if profiler_dir: start_profiler(profiler_dir, True) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboard_writer: for parameter in z.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch) if model_dir: z.save( os.path.join(model_dir, network_name + "_{}.dnn".format(epoch))) enable_profiler() # begin to collect profiler data after first epoch if profiler_dir: stop_profiler() # Evaluation parameters test_epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 while sample_count < test_epoch_size: current_minibatch = min(minibatch_size, test_epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples print("") trainer.summarize_test_progress() print("") return metric_numer / metric_denom
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None, model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width), name='features') label_var = C.input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: raise RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters minibatch_size = 128 momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # progress writers progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)] tensorboard_writer = None if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z) progress_writers.append(tensorboard_writer) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, (ce, pe), learner, progress_writers) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() # perform model training if profiler_dir: start_profiler(profiler_dir, True) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboard_writer: for parameter in z.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch) if model_dir: z.save(os.path.join(model_dir, network_name + "_{}.dnn".format(epoch))) enable_profiler() # begin to collect profiler data after first epoch if profiler_dir: stop_profiler() # Evaluation parameters test_epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 while sample_count < test_epoch_size: current_minibatch = min(minibatch_size, test_epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples print("") trainer.summarize_test_progress() print("") return metric_numer/metric_denom
def train_lm(): data = DataReader(token_to_id_path, segment_sepparator) # Create model nodes for the source and target inputs input_sequence, label_sequence = create_inputs(data.vocab_dim) # Create the model. It has three output nodes # z: the input to softmax that provides the latent representation of the next token # cross_entropy: this is used training criterion # error: this a binary indicator if the model predicts the correct token z, cross_entropy, error = create_model(input_sequence, label_sequence, data.vocab_dim, hidden_dim) # For measurement we use the (build in) full softmax. full_ce = C.cross_entropy_with_softmax(z, label_sequence) # print out some useful training information log_number_of_parameters(z) print() # Run the training loop num_trained_samples = 0 num_trained_samples_since_last_report = 0 # Instantiate the trainer object to drive the model training lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample) momentum_schedule = momentum_as_time_constant_schedule( momentum_as_time_constant) gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_schedule, momentum_schedule, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) trainer = Trainer(z, (cross_entropy, error), learner) for epoch_count in range(num_epochs): for features, labels, token_count in data.minibatch_generator( train_file_path, sequence_length, sequences_per_batch): arguments = ({input_sequence: features, label_sequence: labels}) t_start = timeit.default_timer() trainer.train_minibatch(arguments) t_end = timeit.default_timer() samples_per_second = token_count / (t_end - t_start) # Print progress report every num_samples_between_progress_report samples if num_trained_samples_since_last_report >= num_samples_between_progress_report or num_trained_samples == 0: av_ce = average_cross_entropy(full_ce, input_sequence, label_sequence, data) print_progress(samples_per_second, av_ce, num_trained_samples, t_start) num_trained_samples_since_last_report = 0 num_trained_samples += token_count num_trained_samples_since_last_report += token_count # after each epoch save the model model_filename = "models/lm_epoch%d.dnn" % epoch_count z.save_model(model_filename) print("Saved model to '%s'" % model_filename)
def train_lm(training_file, epochs, max_num_minibatches): # load the data and vocab data, char_to_ix, ix_to_char, data_size, vocab_dim = load_data_and_vocab( training_file) # Model the source and target inputs to the model input_sequence, label_sequence = create_inputs(vocab_dim) # create the model model = create_model(vocab_dim) # and apply it to the input sequence z = model(input_sequence) # setup the criterions (loss and metric) ce = cross_entropy_with_softmax(z, label_sequence) errs = classification_error(z, label_sequence) # Instantiate the trainer object to drive the model training lr_per_sample = learning_rate_schedule(0.001, UnitType.sample) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 5.0 gradient_clipping_with_truncation = True learner = momentum_sgd( z.parameters, lr_per_sample, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = ProgressPrinter(freq=100, tag='Training') trainer = Trainer(z, (ce, errs), learner, progress_printer) sample_freq = 1000 minibatches_per_epoch = min(data_size // minibatch_size, max_num_minibatches // epochs) # print out some useful training information log_number_of_parameters(z) print("Running %d epochs with %d minibatches per epoch" % (epochs, minibatches_per_epoch)) print() for e in range(0, epochs): # Specify the mapping of input variables in the model to actual minibatch data to be trained with # If it's the start of the data, we specify that we are looking at a new sequence (True) mask = [True] for b in range(0, minibatches_per_epoch): # get the data features, labels = get_data(b, minibatch_size, data, char_to_ix, vocab_dim) arguments = ({ input_sequence: features, label_sequence: labels }, mask) mask = [False] trainer.train_minibatch(arguments) global_minibatch = e * minibatches_per_epoch + b if global_minibatch % sample_freq == 0: print(sample(z, ix_to_char, vocab_dim, char_to_ix)) model_filename = "models/shakespeare_epoch%d.dnn" % (e + 1) z.save_model(model_filename) print("Saved model to '%s'" % model_filename)