def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer): if network['name'] == 'resnet20': lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network['name'] == 'resnet110': lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: return RuntimeError("Unknown model name!") momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # learner object if block_size != None and num_quantization_bits != 32: raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.") local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) return Trainer(network['output'], (network['ce'], network['pe']), learner, progress_printer)
def create_trainer(network, epoch_size, num_quantization_bits, block_size, warm_up): # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.5, UnitType.minibatch) momentum_time_constant = momentum_as_time_constant_schedule(1100) clipping_threshold_per_sample = 2.3 gradient_clipping_with_truncation = True # Create learner if block_size is not None and num_quantization_bits != default_quantization_bits: raise RuntimeError( "Block momentum cannot be used with quantization, please remove quantized_bits option." ) local_learner = momentum_sgd( network['output'].parameters, lr_per_minibatch, momentum_time_constant, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) return Trainer(network['output'], (network['ce'], network['pe']), learner)
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits): if network['name'] == 'resnet20': lr_per_mb = [1.0] * 80 + [0.1] * 40 + [0.01] elif network['name'] == 'resnet110': lr_per_mb = [0.1] * 1 + [1.0] * 80 + [0.1] * 40 + [0.01] else: return RuntimeError("Unknown model name!") momentum_time_constant = -minibatch_size / np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr / minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # learner object local_learner = momentum_sgd(network['output'].parameters, lr_schedule, mm_schedule, unit_gain=True, l2_regularization_weight=l2_reg_weight) learner = data_parallel_distributed_learner( learner=local_learner, num_quantization_bits=num_quantization_bits, distributed_after=0) return Trainer(network['output'], network['ce'], network['pe'], learner)
def create_trainer(network, minibatch_size, epoch_size, num_quantization_bits, block_size, warm_up, progress_printer): lr_per_mb = [0.1] # [1.0]*30 + [0.1]*30 + [0.01]*20 + [0.001] l2_reg_weight = 0.0001 # adjust LR with minibatch size #if minibatch_size != 256: # for i in range(0, len(lr_per_mb)): # lr_per_mb[i] *= minibatch_size / 256 # Set learning parameters lr_schedule = learning_rate_schedule(lr_per_mb, epoch_size=epoch_size, unit=UnitType.minibatch) mm_schedule = momentum_schedule(0.9) local_learner = nesterov(network['output'].parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) # learner object if block_size != None and num_quantization_bits != 32: raise RuntimeError("Block momentum cannot be used with quantization, please remove quantized_bits option.") if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner(local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) return Trainer(network['output'], (network['ce'], network['errs']), learner, progress_printer)
def test_usermbsource_training(tmpdir): input_dim = 1000 num_output_classes = 5 mbs = MyDataSource(input_dim, num_output_classes) from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \ classification_error, learning_rate_schedule, sgd, Trainer, \ training_session, times, UnitType feature = sequence.input_variable(shape=(input_dim, )) label = C.input_variable(shape=(num_output_classes, )) p = parameter(shape=(input_dim, num_output_classes), init=10) z = times(sequence.reduce_sum(feature), p, name='z') ce = cross_entropy_with_softmax(z, label) errs = classification_error(z, label) lr_per_sample = learning_rate_schedule([0.3, 0.2, 0.1, 0.0], UnitType.sample) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = {feature: mbs.fsi, label: mbs.lsi} session = training_session(trainer=trainer, mb_source=mbs, model_inputs_to_streams=input_map, mb_size=4, max_samples=20) session.train() assert trainer.total_number_of_samples_seen == 20
def train(reader, model, max_epochs): # Input variables denoting the features and label data query = Input(input_dim, is_sparse=False) # TODO: make sparse once it works slot_labels = Input(num_labels, is_sparse=True) # apply model to input z = model(query) # loss and metric ce = cross_entropy_with_softmax(z, slot_labels) pe = classification_error (z, slot_labels) # training config epoch_size = 36000 minibatch_size = 70 num_mbs_to_show_result = 100 time_constant = minibatch_size / math.log(1/0.9) lr_per_sample = [0.003]*2+[0.0015]*12+[0.0003] # trainer object lr_schedule = learning_rate_schedule(lr_per_sample, units=epoch_size) learner = fsadagrad(z.parameters, lr_schedule, time_constant, targetAdagradAvDenom=1, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = Trainer(z, ce, pe, [learner]) #_extend_Trainer(trainer) # TODO: should be just baked in # define mapping from reader streams to network inputs input_map = { query : reader.streams.query, slot_labels : reader.streams.slot_labels } # process minibatches and perform model training t = 0 mbs = 0 for epoch in range(max_epochs): loss_numer = 0 # TODO: find a nicer way of tracking, this is clumsy loss_denom = 0 metric_numer = 0 metric_denom = 0 epoch_end = (epoch+1) * epoch_size while t < epoch_end: # BUGBUG: RuntimeError: GetNextMinibatch: Changing minibatch sizes across calls is currently unsupported #data, num_samples = next_minibatch(reader, min(minibatch_size, epoch_size-t), input_map) data = reader.next_minibatch(minibatch_size, input_map=input_map) if data is None: break trainer.train_minibatch(data) loss_numer += trainer.previous_minibatch_loss_average * trainer.previous_minibatch_sample_count # too much code for something this simple loss_denom += trainer.previous_minibatch_sample_count metric_numer += trainer.previous_minibatch_evaluation_average * trainer.previous_minibatch_sample_count metric_denom += trainer.previous_minibatch_sample_count print_training_progress(trainer, mbs if mbs > 10 else 0, num_mbs_to_show_result) t += data[slot_labels].num_samples mbs += 1 print("--- EPOCH {} DONE: loss = {:0.6f} * {}, metric = {:0.1f}% * {} ---".format(epoch+1, loss_numer/loss_denom, loss_denom, metric_numer/metric_denom*100.0, metric_denom)) return loss_numer/loss_denom, metric_numer/metric_denom
def create_trainer(loss, pred_error, lr_per_sample, mm_schedule, l2_reg_weight, epochs_to_train, cfg): # Set learning parameters if isinstance(loss, C.Variable): loss = C.combine([loss]) params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) bias_lr_per_sample = [v * bias_lr_mult for v in lr_per_sample] bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) return Trainer(None, (loss, pred_error), [learner, bias_learner])
def train_model(reader, model, criterion, epoch_size=50000, max_epochs=80): minibatch_size = 64 # learning parameters learner = momentum_sgd(model.parameters, lr = learning_rate_schedule([0.0015625]*20+[0.00046875]*20+[0.00015625]*20+[0.000046875]*10+[0.000015625], unit=UnitType.sample, epoch_size=epoch_size), momentum = momentum_as_time_constant_schedule([0]*20+[600]*20+[1200], epoch_size=epoch_size), l2_regularization_weight = 0.002) # trainer object trainer = Trainer(None, criterion, learner) # perform model training log_number_of_parameters(model) ; print() progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch mb = reader.next_minibatch(min(minibatch_size, epoch_size - sample_count)) # fetch minibatch. #trainer.train_minibatch(mb[reader.streams.features], mb[reader.streams.labels]) trainer.train_minibatch({criterion.arguments[0]: mb[reader.streams.features], criterion.arguments[1]: mb[reader.streams.labels]}) sample_count += mb[reader.streams.labels].num_samples # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) model.save(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) # return evaluation error. return loss, metric # return values from last epoch
def test_learner_logging(): from cntk import Trainer from cntk.logging import ProgressPrinter from cntk import cross_entropy_with_softmax, classification_error features = C.input_variable(shape=(1,), needs_gradient=True, name='a') w_init = 1 w = parameter(shape=(1,), init=w_init) z = features * w labels = C.input_variable(shape=(1,), name='b') ce = cross_entropy_with_softmax(z, labels) errs = classification_error(z, labels) writer = TestProgressWriter(); lr_values = [0.3, 0.2, 0.1, 0] m_values = [0.6, 0.7, 0.8] learner = C.momentum_sgd(z.parameters, learning_rate_schedule(lr_values, UnitType.sample, 1), C.momentum_schedule(m_values, 1)) trainer = Trainer(z, (ce, errs), [learner], writer) for i in range(10): trainer.train_minibatch({features: [[2.]], labels: [[1.]]}) assert len(writer.log_output) == len(lr_values + m_values) values = [j for i in zip(lr_values,m_values) for j in i] + [0] for i in range(len(values)): assert (values[i] == writer.log_output[i])
def train(reader, model, max_epochs): # Input variables denoting the features and label data query = Input(input_dim, is_sparse=False) slot_labels = Input(num_labels, is_sparse=True) # TODO: make sparse once it works # apply model to input z = model(query) # loss and metric ce = cross_entropy_with_softmax(z, slot_labels) pe = classification_error (z, slot_labels) # training config epoch_size = 36000 minibatch_size = 70 num_mbs_to_show_result = 100 momentum_time_constant = momentum_as_time_constant_schedule(minibatch_size / -math.log(0.9)) # TODO: Change to round number. This is 664.39. 700? lr_schedule = [0.003]*2+[0.0015]*12+[0.0003] # LR schedule over epochs (we don't run that many epochs, but if we did, these are good values) # trainer object lr_per_sample = learning_rate_schedule(lr_schedule, UnitType.sample, epoch_size) learner = adam_sgd(z.parameters, lr=lr_per_sample, momentum=momentum_time_constant, low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = Trainer(z, ce, pe, [learner]) # define mapping from reader streams to network inputs input_map = { query : reader.streams.query, slot_labels : reader.streams.slot_labels } # process minibatches and perform model training log_number_of_parameters(z) ; print() progress_printer = ProgressPrinter(freq=100, first=10, tag='Training') # more detailed logging #progress_printer = ProgressPrinter(tag='Training') t = 0 for epoch in range(max_epochs): # loop over epochs epoch_end = (epoch+1) * epoch_size while t < epoch_end: # loop over minibatches on the epoch # BUGBUG? The change of minibatch_size parameter vv has no effect. data = reader.next_minibatch(min(minibatch_size, epoch_end-t), input_map=input_map) # fetch minibatch trainer.train_minibatch(data) # update model with it t += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress #def trace_node(name): # nl = [n for n in z.parameters if n.name() == name] # if len(nl) > 0: # print (name, np.asarray(nl[0].value)) #trace_node('W') #trace_node('stabilizer_param') loss, metric, actual_samples = progress_printer.epoch_summary(with_metric=True) return loss, metric
def train_sequence_classifier(): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim, True, 'x'), StreamConfiguration(labels_stream_name, num_output_classes, False, 'y') ], 0) features_si = mb_source.stream_info(features) labels_si = mb_source.stream_info(label) # Instantiate the trainer object to drive the model training lr = lr = learning_rates_per_sample(0.0005) trainer = Trainer(classifier_output, ce, pe, [sgd_learner(classifier_output.owner.parameters(), lr)]) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 i = 0 while True: mb = mb_source.get_next_minibatch(minibatch_size) if len(mb) == 0: break # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = { features: mb[features_si].m_data, label: mb[labels_si].m_data } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq) i += 1
def Evaluator(criterion): loss, metric = Trainer._get_loss_metric(criterion) parameters = set(loss.parameters) if metric: parameters |= set(metric.parameters) dummy_learner = momentum_sgd(tuple(parameters), lr=learning_parameter_schedule(1), momentum=momentum_schedule(0)) return Trainer(None, (loss, metric), dummy_learner)
def cifar_resnet(): dev = 0 cntk_dev = cntk_device(dev) epoch_size = sys.maxsize mbs = create_mb_source(epoch_size) stream_infos = mbs.stream_infos() for si in stream_infos: if si.m_name == 'features': features_si = si elif si.m_name == 'labels': labels_si = si image_shape = features_si.m_sample_layout.dimensions() image_shape = (image_shape[2], image_shape[0], image_shape[1]) num_classes = labels_si.m_sample_layout.dimensions()[0] image_input = variable(image_shape, features_si.m_element_type, needs_gradient=False, name="Images") classifier_output = resnet_classifer(image_input, num_classes, dev, "classifierOutput") label_var = variable((num_classes), features_si.m_element_type, needs_gradient=False, name="Labels") ce = cross_entropy_with_softmax(classifier_output, label_var) pe = classification_error(classifier_output, label_var) #TODO: add save and load module code image_classifier = combine([ce, pe, classifier_output], "ImageClassifier") lr = learning_rates_per_sample(0.0078125) mb_size = 32 num_mbs = 1000 trainer = Trainer(classifier_output, ce, pe, [sgdlearner(classifier_output.owner.parameters(), lr)]) for i in range(0, num_mbs): mb = mbs.get_next_minibatch(mb_size, cntk_dev) arguments = dict() arguments[image_input] = mb[features_si].m_data arguments[label_var] = mb[labels_si].m_data trainer.train_minibatch(arguments, cntk_dev) freq = 20 if i % freq == 0: training_loss = get_train_loss(trainer) eval_crit = get_train_eval_criterion(trainer) print( "Minibatch: {}, Train Loss: {}, Train Evaluation Criterion: {}" .format(i, training_loss, eval_crit))
def Evaluator(criterion): loss, metric = Trainer._get_loss_metric(criterion) parameters = set(loss.parameters) if metric: parameters |= set(metric.parameters) dummy_learner = momentum_sgd(tuple(parameters), lr = learning_rate_schedule(1, UnitType.minibatch), momentum = momentum_as_time_constant_schedule(0)) return Trainer(None, (loss, metric), dummy_learner)
def train_sequence_classifier(debug_output=False): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes, dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifer_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = r"../../../../Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample)) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 training_progress_output_freq = 10 if debug_output: training_progress_output_freq = training_progress_output_freq / 3 for i in range(251): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) print_training_progress(trainer, i, training_progress_output_freq) import copy evaluation_average = copy.copy( trainer.previous_minibatch_evaluation_average) loss_average = copy.copy(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def finalize_network(reader, model_details, max_amount_of_epochs, samples_per_epoch, samples_per_minibatch, pixel_dimensions, classes, learning_rate): features = input_variable(shape=(pixel_dimensions['depth'], pixel_dimensions['height'], pixel_dimensions['width'])) label = input_variable(shape=len(classes)) # speeds up training normalized_features = element_times(1.0 / 256.0, features) model = create_tf_model(model_details, num_classes=len(classes), input_features=normalized_features, freeze=True) loss = cross_entropy_with_softmax(model, label) metric = classification_error(model, label) learner = momentum_sgd(parameters=model.parameters, lr=learning_rate_schedule(learning_rate, UnitType.minibatch), momentum=0.9, l2_regularization_weight=0.0005) reporter = ProgressPrinter(tag='training', num_epochs=max_amount_of_epochs) trainer = Trainer(model=model, criterion=(loss, metric), parameter_learners=[learner], progress_writers=[reporter]) log_number_of_parameters(model) map_input_to_streams_train = { features: reader.streams.features, label: reader.streams.labels } training_session(trainer=trainer, mb_source=reader, model_inputs_to_streams=map_input_to_streams_train, mb_size=samples_per_minibatch, progress_frequency=samples_per_epoch, checkpoint_config=CheckpointConfig( frequency=samples_per_epoch, filename=os.path.join("./checkpoints", "ConvNet_Lego_VisiOn"), restore=True)).train() network = {'features': features, 'label': label, 'model': softmax(model)} model_name = f"CNN-3200-224-resnet-18.model" export_path = os.path.abspath( os.path.join("..", "..", "Final models", "CNN", model_name)) model.save(export_path) return network
def train_fast_rcnn(debug_output=False): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = input_variable((num_channels, image_height, image_width)) roi_input = input_variable((num_rois, 4)) label_input = input_variable((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], roi_input: minibatch_source[roi_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(frcn_output, (ce, pe), learner) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) return frcn_output
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data input = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant((), 0.00390625), input) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) rel_path = r"../../../../Examples/Image/MNIST/Data/Train-28x28_cntk_text.txt" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) feature_stream_name = 'features' labels_stream_name = 'labels' mb_source = text_format_minibatch_source(path, [ StreamConfiguration(feature_stream_name, input_dim), StreamConfiguration(labels_stream_name, num_output_classes) ]) features_si = mb_source.stream_info(feature_stream_name) labels_si = mb_source.stream_info(labels_stream_name) # Instantiate the trainer object to drive the model training lr = learning_rates_per_sample(0.003125) trainer = Trainer(netout, ce, pe, [sgd_learner(netout.owner.parameters(), lr)]) # Get minibatches of images to train with and perform model training minibatch_size = 32 num_samples_per_sweep = 60000 num_sweeps_to_train_with = 1 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 20 for i in range(0, int(num_minibatches_to_train)): mb = mb_source.get_next_minibatch(minibatch_size) # Specify the mapping of input variables in the model to actual minibatch data to be trained with arguments = { input: mb[features_si].m_data, label: mb[labels_si].m_data } trainer.train_minibatch(arguments) print_training_progress(trainer, i, training_progress_output_freq)
def train_model(debug_output=False): # Create the minibatch source minibatch_source = create_reader(map_file) # Input variables denoting features, rois and label data image_input = input_variable((num_channels, image_height, image_width)) label_input = input_variable((num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, label_input: minibatch_source.streams.labels } # Instantiate the Fast R-CNN prediction model and loss function model = modify_model(image_input, num_classes) ce = cross_entropy_with_softmax(model, label_input) pe = classification_error(model, label_input) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] momentum_time_constant = 10 lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)] learner = momentum_sgd(model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(model, (ce, pe), learner, progress_writers) # Get minibatches of images and perform model training print("Training image classifier for %s epochs." % max_epochs) log_number_of_parameters(model) for epoch in range(max_epochs): sample_count = 0 while sample_count < epoch_size: data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) trainer.train_minibatch(data) sample_count += trainer.previous_minibatch_sample_count trainer.summarize_training_progress() model.save( os.path.join(output_model_folder, 'withcrops_{}.dnn'.format(epoch + 1))) return
def Evaluator(model, criterion): from cntk import Trainer from cntk.learners import momentum_sgd, momentum_schedule_per_sample loss, metric = Trainer._get_loss_metric(criterion) parameters = set(loss.parameters) if model: parameters |= set(model.parameters) if metric: parameters |= set(metric.parameters) dummy_learner = momentum_sgd(tuple(parameters), lr=learning_parameter_schedule(1), momentum=momentum_schedule_per_sample(0)) return Trainer(model, (loss, metric), dummy_learner)
def Evaluator(model, criterion): from cntk import Trainer from cntk.learners import momentum_sgd, learning_rate_schedule, UnitType, momentum_as_time_constant_schedule loss, metric = Trainer._get_loss_metric(criterion) parameters = set(loss.parameters) if model: parameters |= set(model.parameters) if metric: parameters |= set(metric.parameters) dummy_learner = momentum_sgd(tuple(parameters), lr = learning_rate_schedule(1, UnitType.minibatch), momentum = momentum_as_time_constant_schedule(0)) return Trainer(model, (loss, metric), dummy_learner)
def train_and_test(s2smodel, train_reader, test_reader, block_size, num_quantization_bits, max_epochs, epoch_size, minibatch_size, progress_printer, warm_up): from Sequence2Sequence import create_criterion_function, create_model_train model_train = create_model_train(s2smodel) criterion = create_criterion_function(model_train) # Create learner if block_size is not None and num_quantization_bits != default_quantization_bits: raise RuntimeError( "Block momentum cannot be used with quantization, please remove quantized_bits option." ) lr = 0.001 if use_attention else 0.005 # TODO: can we use the same value for both? local_learner = fsadagrad( model_train.parameters, lr=learning_rate_schedule([lr] * 2 + [lr / 2] * 3 + [lr / 4], UnitType.sample, epoch_size), momentum=momentum_as_time_constant_schedule(1100), gradient_clipping_threshold_per_sample=2.3, gradient_clipping_with_truncation=True) if block_size != None: learner = block_momentum_distributed_learner(local_learner, block_size=block_size) else: learner = data_parallel_distributed_learner( local_learner, num_quantization_bits=num_quantization_bits, distributed_after=warm_up) trainer = Trainer(None, criterion, learner, progress_printer) train_bind = { criterion.arguments[0]: train_reader.streams.features, criterion.arguments[1]: train_reader.streams.labels } training_session( mb_source=train_reader, trainer=trainer, model_inputs_to_streams=train_bind, mb_size=minibatch_size, progress_frequency=epoch_size, checkpoint_config=CheckpointConfig(frequency=epoch_size, filename=os.path.join( model_path, "SequenceToSequence"), restore=False), cv_config=CrossValidationConfig(source=test_reader, mb_size=minibatch_size)).train()
def ffnet(): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 epoch_size = sys.maxsize minibatch_size = 25 num_samples_per_sweep = 10000 num_sweeps_to_train_with = 2 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size lr = learning_rates_per_sample(0.02) input = variable((input_dim,), np.float32, needs_gradient=False, name="features") label = variable((num_output_classes,), np.float32, needs_gradient=False, name="labels") dev = -1 cntk_dev = cntk_device(dev) netout = fully_connected_classifier_net(input, num_output_classes, hidden_layers_dim, num_hidden_layers, dev, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) #TODO: add save and load module code ffnet = combine([ce, pe, netout], "classifier_model") rel_path = r"../../../../Examples/Other/Simple2d/Data/SimpleDataTrain_cntk_text.txt" path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) cm = create_text_mb_source(path, input_dim, num_output_classes, epoch_size) stream_infos = cm.stream_infos() for si in stream_infos: if si.m_name == 'features': features_si = si elif si.m_name == 'labels': labels_si = si trainer = Trainer(netout, ce, pe, [sgdlearner(netout.owner.parameters(), lr)]) for i in range(0,int(num_minibatches_to_train)): mb=cm.get_next_minibatch(minibatch_size, cntk_dev) arguments = dict() arguments[input] = mb[features_si].m_data arguments[label] = mb[labels_si].m_data trainer.train_minibatch(arguments, cntk_dev) freq = 20 if i % freq == 0: training_loss = get_train_loss(trainer) eval_crit = get_train_eval_criterion(trainer) print ("Minibatch: {}, Train Loss: {}, Train Evaluation Criterion: {}".format(i, training_loss, eval_crit))
def ffnet(debug_output=False): input_dim = 2 num_output_classes = 2 num_hidden_layers = 2 hidden_layers_dim = 50 # Input variables denoting the features and label data input = input_variable((input_dim), np.float32) label = input_variable((num_output_classes), np.float32) # Instantiate the feedforward classification model netout = fully_connected_classifier_net(input, num_output_classes, hidden_layers_dim, num_hidden_layers, sigmoid) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) # Instantiate the trainer object to drive the model training trainer = Trainer(netout, ce, pe, [sgd(netout.parameters(), lr=0.02)]) # Get minibatches of training data and perform model training minibatch_size = 25 num_samples_per_sweep = 10000 num_sweeps_to_train_with = 2 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size training_progress_output_freq = 60 if debug_output: training_progress_output_freq = training_progress_output_freq / 3 for i in range(0, int(num_minibatches_to_train)): features, labels = generate_random_data(minibatch_size, input_dim, num_output_classes) # Specify the mapping of input variables in the model to actual # minibatch data to be trained with trainer.train_minibatch({input: features, label: labels}) print_training_progress(trainer, i, training_progress_output_freq) test_features, test_labels = generate_random_data(minibatch_size, input_dim, num_output_classes) avg_error = trainer.test_minibatch({ input: test_features, label: test_labels }) return avg_error
def train_sequence_classifier(): input_dim = 2000 cell_dim = 25 hidden_dim = 25 embedding_dim = 50 num_output_classes = 5 # Input variables denoting the features and label data features = sequence.input_variable(shape=input_dim, is_sparse=True) label = input_variable(num_output_classes) # Instantiate the sequence classification model classifier_output = LSTM_sequence_classifier_net(features, num_output_classes, embedding_dim, hidden_dim, cell_dim) ce = cross_entropy_with_softmax(classifier_output, label) pe = classification_error(classifier_output, label) rel_path = ("../../../Tests/EndToEndTests/Text/" + "SequenceClassification/Data/Train.ctf") path = os.path.join(os.path.dirname(os.path.abspath(__file__)), rel_path) reader = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader.streams.features, label: reader.streams.labels } lr_per_sample = learning_rate_schedule(0.0005, UnitType.sample) # Instantiate the trainer object to drive the model training progress_printer = ProgressPrinter(0) trainer = Trainer(classifier_output, (ce, pe), sgd(classifier_output.parameters, lr=lr_per_sample), progress_printer) # Get minibatches of sequences to train with and perform model training minibatch_size = 200 for i in range(255): mb = reader.next_minibatch(minibatch_size, input_map=input_map) trainer.train_minibatch(mb) evaluation_average = float(trainer.previous_minibatch_evaluation_average) loss_average = float(trainer.previous_minibatch_loss_average) return evaluation_average, loss_average
def _train(z, loss, eval_error, f_input, l_input, num_output_classes, steps): np.random.seed(0) input_dim = 2 lr_schedule = C.learning_parameter_schedule(0.5) #now we want the learning be compatible with the way in the literature without the per sample benefit: learner = sgd(z.parameters, lr_schedule, minibatch_size=C.learners.IGNORE) trainer = Trainer(z, (loss, eval_error), [learner]) minibatch_size = 10 for i in range(steps): features, labels = _generate_random_data_sample( minibatch_size, input_dim, num_output_classes) trainer.train_minibatch({f_input: features, l_input: labels})
def test_usermbsource_training(tmpdir, with_checkpoint_impl): input_dim = 1000 num_output_classes = 5 mbs = MyDataSource(input_dim, num_output_classes) # Using this for testing the UserMinibatchSource checkpointing if with_checkpoint_impl: MBS_CV_CLASS = MyDataSourceWithCheckpoint else: MBS_CV_CLASS = MyDataSource mbs_cv = MBS_CV_CLASS(input_dim, num_output_classes) from cntk import sequence, parameter, plus, cross_entropy_with_softmax, \ classification_error, learning_rate_schedule, sgd, Trainer, \ training_session, times, UnitType feature = sequence.input_variable(shape=(input_dim,)) label = C.input_variable(shape=(num_output_classes,)) p = parameter(shape=(input_dim, num_output_classes), init=10) z = times(sequence.reduce_sum(feature), p, name='z') ce = cross_entropy_with_softmax(z, label) errs = classification_error(z, label) #having a large learning rate to prevent the model from converging earlier where not all the intended samples are fed #note that training session can end earlier if there is no updates lr_per_sample = learning_rate_schedule(0.3, UnitType.sample) learner = sgd(z.parameters, lr_per_sample) trainer = Trainer(z, (ce, errs), [learner]) input_map = { feature: mbs.fsi, label: mbs.lsi } session = training_session( trainer=trainer, mb_source=mbs, model_inputs_to_streams=input_map, mb_size=4, max_samples=20, cv_config = C.CrossValidationConfig(minibatch_source=mbs_cv, max_samples=10, minibatch_size=2) ) session.train() assert trainer.total_number_of_samples_seen == 20 if with_checkpoint_impl: assert mbs_cv._restore_from_checkpoint_calls == 1
def _train(z, loss, eval_error, f_input, l_input, num_output_classes, steps): np.random.seed(0) input_dim = 2 lr_schedule = learning_rate_schedule(0.5, UnitType.minibatch) learner = sgd(z.parameters, lr_schedule) trainer = Trainer(z, (loss, eval_error), [learner]) minibatch_size = 10 for i in range(steps): features, labels = _generate_random_data_sample( minibatch_size, input_dim, num_output_classes) trainer.train_minibatch({f_input: features, l_input: labels})
def train_model(base_model_file, feature_node_name, last_hidden_node_name, image_width, image_height, num_channels, num_classes, train_map_file, num_epochs, max_images=-1, freeze=False): epoch_size = sum(1 for line in open(train_map_file)) if max_images > 0: epoch_size = min(epoch_size, max_images) # Create the minibatch source and input variables minibatch_source = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes) image_input = C.input_variable((num_channels, image_height, image_width)) label_input = C.input_variable(num_classes) # Define mapping from reader streams to network inputs input_map = { image_input: minibatch_source[features_stream_name], label_input: minibatch_source[label_stream_name] } # Instantiate the transfer learning model and loss function tl_model = create_model(base_model_file, feature_node_name, last_hidden_node_name, num_classes, image_input, freeze) ce = cross_entropy_with_softmax(tl_model, label_input) pe = classification_error(tl_model, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(tl_model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=num_epochs) trainer = Trainer(tl_model, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training transfer learning model for {0} epochs (epoch_size = {1}).".format(num_epochs, epoch_size)) log_number_of_parameters(tl_model) for epoch in range(num_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far if sample_count % (100 * mb_size) == 0: print("Processed {0} samples".format(sample_count)) trainer.summarize_training_progress() return tl_model
def _hyper_train_target_sub(self, **kwargs): """ Actual training procedure for specific set of hyper parameters. """ if self.saver.log_filename: fh = logging.FileHandler(self.saver.log_filename) self.logger.addHandler(fh) self.logger.info("Training with parameters: {}".format(kwargs)) X_train, Y_train, X_val, Y_val = self.data_source(**kwargs) input_var, label_var, output = self.model(**kwargs) loss = cross_entropy_with_softmax(output, label_var) label_error = classification_error(output, label_var) learner = self.optimizer( parameters=output.parameters, momentum=0.9, **kwargs) progress_printer = ProgressPrinter(tag='Training', num_epochs=self.num_epoch) trainer = Trainer(output, (loss, label_error), [learner], [progress_printer]) # input_map = { # input_var: reader_train.streams.features, # label_var: reader_train.streams.labels # } num_minibatches_to_train = X_train.shape[0] / self.data_source.batch_size for i in range(0, int(num_minibatches_to_train)): features = X_train[:self.data_source.batch_size] labels = Y_train[:self.data_source.batch_size] trainer.train_minibatch({input_var: features, label_var: labels}) if self.saver.log_filename: self.logger.removeHandler(fh) fh.close() best_value = 0.0 return best_value