def train(): print('Unpickling data (this could take a short while)') training_data = pickle.load(open('tmp_textdata.pickle', 'rb')) print('Preprocessing data (this could take a LONG while)...') do_subsampling(training_data, subsampling=4e-5, prog_freq=1e7) print('Preprocessing is done. Final # of training words: {}'.format(len(training_data.text_as_id_list))) mb_source = WordMinibatchSource(training_data, max_window_size) mb_num_samples = 128 mb_size = minibatch_size_schedule(mb_num_samples) freq_list = training_data.id2freq token2id = training_data.token2id vocab_dim = len(freq_list) print(vocab_dim) input_vector, label_vector = create_inputs(vocab_dim) z, cross_entropy, error = create_model(input_vector, label_vector, freq_list, vocab_dim, hidden_dim) lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample) lr_schedule2 = learning_rate_schedule([(3e-3)*(0.8**i) for i in range(10)], UnitType.sample, epoch_size=len(training_data.text_as_id_list)//2) mom_schedule = C.learners.momentum_schedule(0.005, UnitType.sample) gradient_clipping_with_truncation = True learner = C.learners.sgd(z.parameters, lr=lr_schedule2, gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, gradient_clipping_with_truncation=gradient_clipping_with_truncation) # var_mom_schedule = C.learners.momentum_schedule(0.999, UnitType.sample) # learner2 = C.learners.adam(z.parameters, # lr=lr_schedule, # momentum=mom_schedule, # variance_momentum=var_mom_schedule, # epsilon=1.5e-8, # gradient_clipping_threshold_per_sample=clipping_threshold_per_sample, # gradient_clipping_with_truncation=gradient_clipping_with_truncation) progress_printer = C.logging.ProgressPrinter(freq=200, tag='Training') checkpoint_config = CheckpointConfig(frequency = 100000*mb_num_samples, filename = os.path.join(os.getcwd(), "word2vec_checkpoint"), restore = False) trainer = Trainer(z, (cross_entropy, error), [learner], progress_writers=[progress_printer]) input_map = { input_vector: mb_source.fsi, label_vector: mb_source.lsi } session = training_session(trainer, mb_source, mb_size, input_map, progress_frequency=len(training_data.text_as_id_list), max_samples = None, checkpoint_config=checkpoint_config, cv_config=None, test_config=None) C.logging.log_number_of_parameters(z) ; print() session.train()
printer = [ProgressPrinter( tag = 'Training', num_epochs = numOfEpochs)] learningRate = learning_rate_schedule([0.1, 0.01, 0.001], UnitType.sample, 700) trainer = Trainer(outputLayer,(crossEntropy, classificationError), [adadelta(outputLayer.parameters, learningRate)], printer) minibatchSize = 50 numberOfSamples = 2208 numberOfSweepsForTraining = 10 trainingSession = training_session( trainer=trainer, mb_source=reader, mb_size=minibatchSize, model_inputs_to_streams=input_map, max_samples=numberOfSamples * numberOfSweepsForTraining, progress_frequency=numberOfSamples ) trainingSession.train() # Testing time # testPath = "test.txt" ctfdResultTest = CTFDeserializer(testPath, StreamDefs( features=StreamDef(field='features', shape=featuresShapeValue), labels=StreamDef(field='labels', shape=labelsShapeValue))) readerTest = MinibatchSource(ctfdResultTest)
def train(input_dir, output_dir, num_epochs): ''' Coordinates model creation and training; minibatch creation ''' num_landcover_classes = 5 num_color_channels = 4 block_size = 256 padding = int(block_size / 4) my_rank = distributed.Communicator.rank() number_of_workers = distributed.Communicator.num_workers() os.makedirs(output_dir, exist_ok=True) # We extract 160 sample regions from an input image before moving along to # the next image file. Our epoch size is 16,000 samples. minibatch_size = 10 minibatches_per_image = 160 minibatches_per_epoch = 1600 epoch_size = minibatch_size * minibatches_per_epoch # Define the input variables f_dim = (num_color_channels, block_size, block_size) l_dim = (num_landcover_classes, block_size, block_size) feature = cntk.input_variable(f_dim, np.float32) label = cntk.input_variable(l_dim, np.float32) # Define the minibatch source minibatch_source = MyDataSource(f_dim, l_dim, number_of_workers, input_dir, minibatches_per_image) input_map = { feature: minibatch_source.streams.features, label: minibatch_source.streams.labels } # Define the model model = model_mini_pub.model(num_landcover_classes, block_size, 2, [64, 32, 32, 32])(feature) # Define the loss function and metric. Note that loss is not computed # directly on the model's output; the edges are first dropped. output = center_square( cntk.reshape(model, (num_landcover_classes, block_size, block_size)), block_size, padding) label_center = center_square(label, block_size, padding) mean_ce, pe = criteria(label_center, output, block_size, num_landcover_classes, [0.0, 1.0, 1.0, 1.0, 1.0]) # Create the progress writer, learner, and trainer (which will be a # distributed trainer if number_of_workers > 1) progress_writers = [ cntk.logging.progress_print.ProgressPrinter(tag='Training', num_epochs=num_epochs, freq=epoch_size, rank=my_rank) ] lr_per_mb = [0.0001] * 30 + [0.00001] * 30 + [0.000001] lr_per_sample = [lr / minibatch_size for lr in lr_per_mb] lr_schedule = cntk.learning_rate_schedule(lr_per_sample, epoch_size=epoch_size, unit=cntk.UnitType.sample) learner = cntk.rmsprop(model.parameters, lr_schedule, 0.95, 1.1, 0.9, 1.1, 0.9, l2_regularization_weight=0.00001) if number_of_workers > 1: parameter_learner = distributed.data_parallel_distributed_learner( learner, num_quantization_bits=32) trainer = cntk.Trainer(output, (mean_ce, pe), parameter_learner, progress_writers) else: trainer = cntk.Trainer(output, (mean_ce, pe), learner, progress_writers) # Perform the training! Note that some progress output will be generated by # each of the workers. if my_rank == 0: print('Retraining model for {} epochs.'.format(num_epochs)) print('Found {} workers'.format(number_of_workers)) print('Printing progress every {} minibatches'.format( minibatches_per_epoch)) cntk.logging.progress_print.log_number_of_parameters(model) training_session(trainer=trainer, max_samples=num_epochs * epoch_size, mb_source=minibatch_source, mb_size=minibatch_size, model_inputs_to_streams=input_map, checkpoint_config=CheckpointConfig( frequency=epoch_size, filename=os.path.join(output_dir, 'trained_checkpoint.model'), preserve_all=True), progress_frequency=epoch_size).train() distributed.Communicator.finalize() if my_rank == 0: trainer.model.save(os.path.join(output_dir, 'trained.model')) return
def retrain_model(map_filename, output_dir, num_classes, epoch_size, model_filename, num_epochs, model_type, retraining_type): ''' Coordinates retraining after MAP file creation ''' # load minibatch and model minibatch_source = create_minibatch_source(map_filename, num_classes) image_input = cntk.ops.input_variable((3, 224, 224)) label_input = cntk.ops.input_variable((num_classes)) input_map = { image_input: minibatch_source.streams.features, label_input: minibatch_source.streams.labels } if model_type == 'alexnet': model = load_alexnet_model(image_input, num_classes, model_filename, retraining_type) elif model_type == 'resnet18': model = load_resnet18_model(image_input, num_classes, model_filename, retraining_type) # Set learning parameters ce = cntk.losses.cross_entropy_with_softmax(model, label_input) pe = cntk.metrics.classification_error(model, label_input) l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 33 + [0.000001] * 33 + [0.0000001] momentum_time_constant = 10 mb_size = 16 lr_schedule = cntk.learners.learning_rate_schedule( lr_per_sample, unit=cntk.UnitType.sample) mm_schedule = cntk.learners.momentum_as_time_constant_schedule( momentum_time_constant) # Instantiate the appropriate trainer object my_rank = distributed.Communicator.rank() num_workers = distributed.Communicator.num_workers() num_minibatches = int(np.ceil(epoch_size / mb_size)) progress_writers = [ cntk.logging.progress_print.ProgressPrinter(tag='Training', num_epochs=num_epochs, freq=num_minibatches, rank=my_rank) ] learner = cntk.learners.fsadagrad(parameters=model.parameters, lr=lr_schedule, momentum=mm_schedule, l2_regularization_weight=l2_reg_weight) if num_workers > 1: parameter_learner = distributed.data_parallel_distributed_learner( learner, num_quantization_bits=32) trainer = cntk.Trainer(model, (ce, pe), parameter_learner, progress_writers) else: trainer = cntk.Trainer(model, (ce, pe), learner, progress_writers) # Print summary lines to stdout and perform training if my_rank == 0: print('Retraining model for {} epochs.'.format(num_epochs)) print('Found {} workers'.format(num_workers)) print('Printing progress every {} minibatches'.format(num_minibatches)) cntk.logging.progress_print.log_number_of_parameters(model) training_session(trainer=trainer, max_samples=num_epochs * epoch_size, mb_source=minibatch_source, mb_size=mb_size, model_inputs_to_streams=input_map, checkpoint_config=CheckpointConfig( frequency=epoch_size, filename=os.path.join(output_dir, 'retrained_checkpoint.model')), progress_frequency=epoch_size).train() distributed.Communicator.finalize() if my_rank == 0: trainer.model.save(os.path.join(output_dir, 'retrained.model')) return (my_rank)
def main(map_filename, output_dir, pretrained_model_filename): ''' Retrain and save the existing AlexNet model ''' num_epochs = 50 mb_size = 16 # Find the number of classes and the number of samples per epoch labels = set([]) epoch_size = 0 with open(map_filename, 'r') as f: for line in f: labels.add(line.strip().split('\t')[1]) epoch_size += 1 sample_image_filename = line.strip().split('\t')[0] num_classes = len(labels) num_minibatches = int(epoch_size // mb_size) # find the typical image dimensions image_height, image_width, num_channels = np.asarray( Image.open(sample_image_filename)).shape assert num_channels == 3, 'Expected to find images with 3 color channels' assert (image_height == 224) and (image_width == 224), \ 'Expected to find images of size 224 pixels x 224 pixels' # Create the minibatch source minibatch_source = create_reader(map_filename, image_height, image_width, num_channels, num_classes) # Input variables denoting features, rois and label data image_input = cntk.ops.input_variable( (num_channels, image_height, image_width)) label_input = cntk.ops.input_variable((num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, label_input: minibatch_source.streams.labels } # Instantiate the Fast R-CNN prediction model and loss function model = modify_model(pretrained_model_filename, image_input, num_classes) ce = cntk.losses.cross_entropy_with_softmax(model, label_input) pe = cntk.metrics.classification_error(model, label_input) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] momentum_time_constant = 10 lr_schedule = cntk.learners.learning_rate_schedule( lr_per_sample, unit=cntk.UnitType.sample) mm_schedule = cntk.learners.momentum_as_time_constant_schedule( momentum_time_constant) # Instantiate the trainer object progress_writers = [ cntk.logging.progress_print.ProgressPrinter(tag='Training', num_epochs=num_epochs, freq=num_minibatches) ] learner = cntk.learners.momentum_sgd( model.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = cntk.Trainer(model, (ce, pe), learner, progress_writers) # Perform retraining and save the resulting model cntk.logging.progress_print.log_number_of_parameters(model) training_session(trainer=trainer, max_samples=num_epochs * epoch_size, mb_source=minibatch_source, mb_size=mb_size, model_inputs_to_streams=input_map, checkpoint_config=CheckpointConfig( frequency=epoch_size, filename=os.path.join(output_dir, 'retrained_checkpoint.model')), progress_frequency=epoch_size).train() model.save(os.path.join(output_dir, 'retrained.model')) return