def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None, model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width), name='features') label_var = C.input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0] * 80 + [0.1] * 40 + [0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1] * 1 + [1.0] * 80 + [0.1] * 40 + [0.01] else: raise RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters minibatch_size = 128 momentum_time_constant = -minibatch_size / np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr / minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # progress writers progress_writers = [ ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat) ] tensorboard_writer = None if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter( freq=10, log_dir=tensorboard_logdir, model=z) progress_writers.append(tensorboard_writer) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, (ce, pe), learner, progress_writers) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() # perform model training if profiler_dir: start_profiler(profiler_dir, True) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboard_writer: for parameter in z.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch) if model_dir: z.save( os.path.join(model_dir, network_name + "_{}.dnn".format(epoch))) enable_profiler() # begin to collect profiler data after first epoch if profiler_dir: stop_profiler() # Evaluation parameters test_epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 while sample_count < test_epoch_size: current_minibatch = min(minibatch_size, test_epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples print("") trainer.summarize_test_progress() print("") return metric_numer / metric_denom
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, profiler_dir=None, model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width), name='features') label_var = C.input_variable((num_classes)) # create model, and configure learning parameters if network_name == 'resnet20': z = create_cifar10_model(input_var, 3, num_classes) lr_per_mb = [1.0]*80+[0.1]*40+[0.01] elif network_name == 'resnet110': z = create_cifar10_model(input_var, 18, num_classes) lr_per_mb = [0.1]*1+[1.0]*80+[0.1]*40+[0.01] else: raise RuntimeError("Unknown model name!") # loss and metric ce = cross_entropy_with_softmax(z, label_var) pe = classification_error(z, label_var) # shared training parameters minibatch_size = 128 momentum_time_constant = -minibatch_size/np.log(0.9) l2_reg_weight = 0.0001 # Set learning parameters lr_per_sample = [lr/minibatch_size for lr in lr_per_mb] lr_schedule = learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # progress writers progress_writers = [ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat)] tensorboard_writer = None if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter(freq=10, log_dir=tensorboard_logdir, model=z) progress_writers.append(tensorboard_writer) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = Trainer(z, (ce, pe), learner, progress_writers) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) ; print() # perform model training if profiler_dir: start_profiler(profiler_dir, True) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboard_writer: for parameter in z.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch) if model_dir: z.save(os.path.join(model_dir, network_name + "_{}.dnn".format(epoch))) enable_profiler() # begin to collect profiler data after first epoch if profiler_dir: stop_profiler() # Evaluation parameters test_epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 while sample_count < test_epoch_size: current_minibatch = min(minibatch_size, test_epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples print("") trainer.summarize_test_progress() print("") return metric_numer/metric_denom
def train_model(cntkModel, params, input_map): log = logging.getLogger("neuralnets1.utils.train_model") mb_size = params['mb_size'] num_epochs = params['num_epochs'] epoch_size_train = params['epoch_size_train'] epoch_size_test = params['epoch_size_test'] minibatch_source_train = params['train_mbs'] minibatch_source_valid = params['valid_mbs'] #minibatch_source_test = params['test_mbs'] ; # Instantiate the trainer object #lr_schedule = learning_rate_schedule(params['learn_rate'], unit=UnitType.minibatch) lr_per_minibatch = learning_parameter_schedule(params['learn_rate'], minibatch_size=mb_size, epoch_size=epoch_size_train) mm_schedule = momentum_schedule(params['beta_momentum_gd']) learner = momentum_sgd(cntkModel.parameters, lr_per_minibatch, mm_schedule, l2_regularization_weight=params['l2_reg_weight']) progress_writers = [ProgressPrinter(tag='Training', num_epochs=num_epochs)] trainer = Trainer(cntkModel, (params['ce'], params['pe']), learner, progress_writers) # Run training epochs log.info( 'Training transfer learning model for %s epochs (epoch_size_train = %s ) .' % (num_epochs, epoch_size_train)) # print("Training transfer learning model for {0} epochs (epoch_size_train = {1}).".format(num_epochs, epoch_size_train)) errsVal = [] errsTrain = [] log_number_of_parameters(cntkModel) for epoch in range(num_epochs): err_numer = 0 sample_counts = 0 while sample_counts < epoch_size_train: # Loop over minibatches in the epoch sample_count = min(mb_size, epoch_size_train - sample_counts) data = minibatch_source_train.next_minibatch(sample_count, input_map=input_map) trainer.train_minibatch(data) # Update model with it sample_counts += sample_count # Count samples processed so far err_numer += trainer.previous_minibatch_evaluation_average * sample_count if sample_counts % (100 * mb_size) == 0: log.info("Training: processed %s samples" % sample_counts) # Compute accuracy on training and test sets errsTrain.append(err_numer / float(sample_counts)) trainer.summarize_training_progress() errsVal.append( cntkComputeTestError(trainer, minibatch_source_valid, mb_size, epoch_size_test, input_map)) trainer.summarize_test_progress() # Plot training progress plt.plot(errsTrain, 'b-', errsVal, 'g-') plt.xlabel('Epoch number') plt.ylabel('Error') plt.title('Training error (blue), validation error (green)') plt.draw() return cntkModel
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data features = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), features) netout = fully_connected_classifier_net( scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) try: rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join(*"../Image/DataSets/MNIST/Train-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader_train.streams.features, label: reader_train.streams.labels } # Instantiate progress writers. logdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mnist_log") tensorboard_writer = TensorBoardProgressWriter(freq=1, log_dir=logdir, model=netout) progress_printer = ProgressPrinter(freq=10, tag='Training') # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch) learner = sgd(netout.parameters, lr=lr_per_minibatch) trainer = Trainer(netout, (ce, pe), learner, [tensorboard_writer, progress_printer]) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 6000 num_sweeps_to_train_with = 2 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size for minibatch_idx in range(0, int(num_minibatches_to_train)): trainer.train_minibatch(reader_train.next_minibatch(minibatch_size, input_map=input_map)) # Log max/min/mean of each parameter tensor, so that we can confirm that the parameters change indeed. # Don't want to do that very often though, otherwise will spend too much time computing min/max/mean. if minibatch_idx % 10 == 9: for p in netout.parameters: tensorboard_writer.write_value(p.uid + "/max", reduce_max(p).eval(), minibatch_idx) tensorboard_writer.write_value(p.uid + "/min", reduce_min(p).eval(), minibatch_idx) tensorboard_writer.write_value(p.uid + "/mean", reduce_mean(p).eval(), minibatch_idx) trainer.summarize_training_progress() # Load test data try: rel_path = os.path.join(os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join(*"../Image/DataSets/MNIST/Test-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { features: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) test_result += trainer.test_minibatch(mb) # Average of evaluation errors of all test minibatches trainer.summarize_test_progress() return test_result / num_minibatches_to_test
def simple_mnist(): input_dim = 784 num_output_classes = 10 num_hidden_layers = 1 hidden_layers_dim = 200 # Input variables denoting the features and label data features = input_variable(input_dim, np.float32) label = input_variable(num_output_classes, np.float32) # Instantiate the feedforward classification model scaled_input = element_times(constant(0.00390625), features) netout = fully_connected_classifier_net(scaled_input, num_output_classes, hidden_layers_dim, num_hidden_layers, relu) ce = cross_entropy_with_softmax(netout, label) pe = classification_error(netout, label) try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Train-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( *"../Image/DataSets/MNIST/Train-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_train = create_reader(path, True, input_dim, num_output_classes) input_map = { features: reader_train.streams.features, label: reader_train.streams.labels } # Instantiate progress writers. logdir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "mnist_log") tensorboard_writer = TensorBoardProgressWriter(freq=1, log_dir=logdir, model=netout) progress_printer = ProgressPrinter(freq=10, tag='Training') # Instantiate the trainer object to drive the model training lr_per_minibatch = learning_rate_schedule(0.2, UnitType.minibatch) learner = sgd(netout.parameters, lr=lr_per_minibatch) trainer = Trainer(netout, (ce, pe), learner, [tensorboard_writer, progress_printer]) # Get minibatches of images to train with and perform model training minibatch_size = 64 num_samples_per_sweep = 6000 num_sweeps_to_train_with = 2 num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size for minibatch_idx in range(0, int(num_minibatches_to_train)): trainer.train_minibatch( reader_train.next_minibatch(minibatch_size, input_map=input_map)) # Log max/min/mean of each parameter tensor, so that we can confirm that the parameters change indeed. # Don't want to do that very often though, otherwise will spend too much time computing min/max/mean. if minibatch_idx % 10 == 9: for p in netout.parameters: tensorboard_writer.write_value(p.uid + "/max", reduce_max(p).eval(), minibatch_idx) tensorboard_writer.write_value(p.uid + "/min", reduce_min(p).eval(), minibatch_idx) tensorboard_writer.write_value(p.uid + "/mean", reduce_mean(p).eval(), minibatch_idx) trainer.summarize_training_progress() # Load test data try: rel_path = os.path.join( os.environ['CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY'], *"Image/MNIST/v0/Test-28x28_cntk_text.txt".split("/")) except KeyError: rel_path = os.path.join( *"../Image/DataSets/MNIST/Test-28x28_cntk_text.txt".split("/")) path = os.path.normpath(os.path.join(abs_path, rel_path)) check_path(path) reader_test = create_reader(path, False, input_dim, num_output_classes) input_map = { features: reader_test.streams.features, label: reader_test.streams.labels } # Test data for trained model test_minibatch_size = 1024 num_samples = 10000 num_minibatches_to_test = num_samples / test_minibatch_size test_result = 0.0 for i in range(0, int(num_minibatches_to_test)): mb = reader_test.next_minibatch(test_minibatch_size, input_map=input_map) test_result += trainer.test_minibatch(mb) # Average of evaluation errors of all test minibatches trainer.summarize_test_progress() return test_result / num_minibatches_to_test
def train_and_evaluate(reader_train, reader_test, network_name, epoch_size, max_epochs, minibatch_size, model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False, fp16=False): """ :param reader_train: :param reader_test: :param network_name: :param epoch_size: 一个epoch有多少样本 :param max_epochs: 训练多少个epoch :param model_dir: :param log_dir: :param tensorboard_logdir: :param gen_heartbeat: :param fp16: :return:准确率,用时 """ set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width), name='features') label_var = C.input_variable((num_classes)) with C.default_options(dtype=np.float32): # create model, and configure learning parameters model = create_cifar10_model(input_var, 3, num_classes) # loss and metric loss = cross_entropy_with_softmax(model, label_var) error_rate = classification_error(model, label_var) # shared training parameters # Set learning parameters lr_per_sample = [] check_point = [80, 120, 160, 180] lrs = [3e-2, 3e-3, 3e-4, 3e-4, 5e-5] for i in range(max_epochs + 1): if i in range(0, check_point[0]): lr_per_sample.append(lrs[0]) if i in range(check_point[0], check_point[1]): lr_per_sample.append(lrs[1]) if i in range(check_point[1], check_point[2]): lr_per_sample.append(lrs[2]) if i in range(check_point[2], check_point[3]): lr_per_sample.append(lrs[3]) if i > check_point[3]: lr_per_sample.append(lrs[4]) lr_schedule = learning_parameter_schedule(lr_per_sample, minibatch_size=minibatch_size, epoch_size=epoch_size) mm_schedule = momentum_schedule(0.9, minibatch_size) #动量 # progress writers progress_writers = [ ProgressPrinter(tag='Training', num_epochs=max_epochs, gen_heartbeat=gen_heartbeat) ] tensorboard_writer = None if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter( freq=10, log_dir=tensorboard_logdir, model=model) progress_writers.append(tensorboard_writer) # trainer object l2_reg_weight = 0.0001 learner = adam(model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(model, (loss, error_rate), learner, progress_writers) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(model) print("*********Training Start*********") start = time.clock() for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboard_writer: for parameter in model.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch) if model_dir: model.save( os.path.join(model_dir, network_name + "_{}.dnn".format(epoch))) enable_profiler() # begin to collect profiler data after first epoch # Evaluation parameters test_epoch_size = 10000 minibatch_size = 32 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 while sample_count < test_epoch_size: current_minibatch = min(minibatch_size, test_epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples print("") trainer.summarize_test_progress() print("") elapsed = (time.clock() - start) return 1 - metric_numer / metric_denom, elapsed
def train_and_evaluate(reader_train, reader_test, epoch_size, max_epochs, profiler_dir=None, model_dir=None, log_dir=None, tensorboard_logdir=None, gen_heartbeat=False, fp16=False): set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = C.input_variable((num_channels, image_height, image_width), name='features') label_var = C.input_variable((num_classes)) dtype = np.float16 if fp16 else np.float32 if fp16: graph_input = C.cast(input_var, dtype=np.float16) graph_label = C.cast(label_var, dtype=np.float16) else: graph_input = input_var graph_label = label_var z = create_cifar10_model(graph_input, 3, num_classes) lr_per_sample = [0.00015625] * 20 + [0.00046875] * 20 + [ 0.00015625 ] * 20 + [0.000046875] * 20 + [0.000015625] ce = cross_entropy_with_softmax(z, graph_label) pe = classification_error(z, graph_label) if fp16: ce = C.cast(ce, dtype=np.float32) pe = C.cast(pe, dtype=np.float32) # shared training parameters minibatch_size = 128 l2_reg_weight = 0.0001 # Set learning parameters lr_schedule = C.learning_parameter_schedule_per_sample( lr_per_sample, epoch_size=epoch_size) mm_schedule = momentum_schedule(0.9, minibatch_size) # progress writers progress_writers = [ ProgressPrinter(tag='Training', log_to_file=log_dir, num_epochs=max_epochs, gen_heartbeat=gen_heartbeat) ] tensorboard_writer = None if tensorboard_logdir is not None: tensorboard_writer = TensorBoardProgressWriter( freq=10, log_dir=tensorboard_logdir, model=z) progress_writers.append(tensorboard_writer) # trainer object learner = momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(z, (ce, pe), learner, progress_writers) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } log_number_of_parameters(z) print() # perform model training if profiler_dir: start_profiler(profiler_dir, True) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch( min(minibatch_size, epoch_size - sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboard_writer: for parameter in z.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", reduce_mean(parameter).eval(), epoch) if model_dir: z.save(os.path.join(model_dir, model_name)) enable_profiler() # begin to collect profiler data after first epoch if profiler_dir: stop_profiler() # Evaluation parameters test_epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 while sample_count < test_epoch_size: current_minibatch = min(minibatch_size, test_epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples print("") trainer.summarize_test_progress() print("") return metric_numer / metric_denom
def train_model(base_model_file, train_map_file, test_map_file, input_resolution, num_epochs, mb_size, max_train_images, lr_per_mb, momentum_per_mb, l2_reg_weight, dropout_rate, freeze_weights, num_channels=3): #init image_width = input_resolution image_height = input_resolution epoch_size_test = len(readTable(test_map_file)) epoch_size_train = len(readTable(train_map_file)) epoch_size_train = min(epoch_size_train, max_train_images) num_classes = max(toIntegers(getColumn(readTable(train_map_file), 1))) + 1 # Create the minibatch source minibatch_source_train = create_mb_source(train_map_file, image_width, image_height, num_channels, num_classes, True) minibatch_source_test = create_mb_source(test_map_file, image_width, image_height, num_channels, num_classes, False) # Define mapping from reader streams to network inputs label_input = input_variable(num_classes) image_input = input_variable((num_channels, image_height, image_width), name="input") input_map = { image_input: minibatch_source_train['features'], label_input: minibatch_source_train['labels'] } # Instantiate the transfer learning model and loss function cntkModel = create_model(base_model_file, image_input, num_classes, dropout_rate, freeze_weights) ce = cross_entropy_with_softmax(cntkModel, label_input) pe = classification_error(cntkModel, label_input) # Instantiate the trainer object lr_schedule = learning_rate_schedule(lr_per_mb, unit=UnitType.minibatch) mm_schedule = momentum_schedule(momentum_per_mb) learner = momentum_sgd(cntkModel.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_writers = [ProgressPrinter(tag='Training', num_epochs=num_epochs)] trainer = Trainer(cntkModel, (ce, pe), learner, progress_writers) # Run training epochs print( "Training transfer learning model for {0} epochs (epoch_size_train = {1})." .format(num_epochs, epoch_size_train)) errsTest = [] errsTrain = [] log_number_of_parameters(cntkModel) for epoch in range(num_epochs): err_numer = 0 sample_counts = 0 while sample_counts < epoch_size_train: # Loop over minibatches in the epoch sample_count = min(mb_size, epoch_size_train - sample_counts) data = minibatch_source_train.next_minibatch(sample_count, input_map=input_map) trainer.train_minibatch(data) # Update model with it sample_counts += sample_count # Count samples processed so far err_numer += trainer.previous_minibatch_evaluation_average * sample_count if sample_counts % (100 * mb_size) == 0: print("Training: processed {0} samples".format(sample_counts)) # Visualize training images # img_data = data[image_input].asarray() # for i in range(len(img_data)): # debugImg = img_data[i].squeeze().swapaxes(0, 1).swapaxes(1, 2) / 255.0 # imshow(debugImg) # Compute accuracy on training and test sets errsTrain.append(err_numer / float(sample_counts)) trainer.summarize_training_progress() errsTest.append( cntkComputeTestError(trainer, minibatch_source_test, mb_size, epoch_size_test, input_map)) trainer.summarize_test_progress() # Plot training progress plt.plot(errsTrain, 'b-', errsTest, 'g-') plt.xlabel('Epoch number') plt.ylabel('Error') plt.title('Training error (blue), test error (green)') plt.draw() return cntkModel