Ejemplo n.º 1
0
def train():
	print('Unpickling data (this could take a short while)')
	training_data = pickle.load(open('tmp_textdata.pickle', 'rb'))
	print('Preprocessing data (this could take a LONG while)...')
	do_subsampling(training_data, subsampling=4e-5, prog_freq=1e7)
	print('Preprocessing is done. Final # of training words: {}'.format(len(training_data.text_as_id_list)))
	mb_source = WordMinibatchSource(training_data, max_window_size)
	mb_num_samples = 128
	mb_size = minibatch_size_schedule(mb_num_samples)

	freq_list = training_data.id2freq
	token2id = training_data.token2id
	vocab_dim = len(freq_list)
	print(vocab_dim)
	input_vector, label_vector = create_inputs(vocab_dim)

	z, cross_entropy, error = create_model(input_vector, label_vector, freq_list, vocab_dim, hidden_dim) 

	lr_schedule = learning_rate_schedule(learning_rate, UnitType.sample)
	lr_schedule2 = learning_rate_schedule([(3e-3)*(0.8**i) for i in range(10)], UnitType.sample, epoch_size=len(training_data.text_as_id_list)//2)
	mom_schedule = C.learners.momentum_schedule(0.005, UnitType.sample)
	gradient_clipping_with_truncation = True
	learner = C.learners.sgd(z.parameters, lr=lr_schedule2,
			    gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
			    gradient_clipping_with_truncation=gradient_clipping_with_truncation)

#	var_mom_schedule = C.learners.momentum_schedule(0.999, UnitType.sample)
#	learner2 = C.learners.adam(z.parameters,
#		lr=lr_schedule,
#		momentum=mom_schedule,
#		variance_momentum=var_mom_schedule,
#		epsilon=1.5e-8,
#		gradient_clipping_threshold_per_sample=clipping_threshold_per_sample,
#		gradient_clipping_with_truncation=gradient_clipping_with_truncation)

	progress_printer = C.logging.ProgressPrinter(freq=200, tag='Training')
	checkpoint_config = CheckpointConfig(frequency = 100000*mb_num_samples,
                                           filename = os.path.join(os.getcwd(), "word2vec_checkpoint"),
                                           restore = False)

	trainer = Trainer(z, (cross_entropy, error), [learner], progress_writers=[progress_printer])
	
	input_map = { input_vector: mb_source.fsi, label_vector: mb_source.lsi }	

	session = training_session(trainer, mb_source, mb_size, input_map, progress_frequency=len(training_data.text_as_id_list), max_samples = None, checkpoint_config=checkpoint_config, cv_config=None, test_config=None)
	
	C.logging.log_number_of_parameters(z) ; print()
	session.train()
Ejemplo n.º 2
0
printer = [ProgressPrinter(
    tag = 'Training',
    num_epochs = numOfEpochs)]

learningRate = learning_rate_schedule([0.1, 0.01, 0.001], UnitType.sample, 700)

trainer = Trainer(outputLayer,(crossEntropy, classificationError), [adadelta(outputLayer.parameters, learningRate)], printer)

minibatchSize = 50
numberOfSamples = 2208
numberOfSweepsForTraining = 10

trainingSession = training_session(
        trainer=trainer,
        mb_source=reader,
        mb_size=minibatchSize,
        model_inputs_to_streams=input_map,
        max_samples=numberOfSamples * numberOfSweepsForTraining,
        progress_frequency=numberOfSamples
    )

trainingSession.train()

# Testing time #
testPath = "test.txt"

ctfdResultTest = CTFDeserializer(testPath, StreamDefs(
        features=StreamDef(field='features', shape=featuresShapeValue),
        labels=StreamDef(field='labels', shape=labelsShapeValue)))

readerTest = MinibatchSource(ctfdResultTest)
Ejemplo n.º 3
0
def train(input_dir, output_dir, num_epochs):
    ''' Coordinates model creation and training; minibatch creation '''
    num_landcover_classes = 5
    num_color_channels = 4
    block_size = 256
    padding = int(block_size / 4)

    my_rank = distributed.Communicator.rank()
    number_of_workers = distributed.Communicator.num_workers()
    os.makedirs(output_dir, exist_ok=True)

    # We extract 160 sample regions from an input image before moving along to
    # the next image file. Our epoch size is 16,000 samples.
    minibatch_size = 10
    minibatches_per_image = 160
    minibatches_per_epoch = 1600
    epoch_size = minibatch_size * minibatches_per_epoch

    # Define the input variables
    f_dim = (num_color_channels, block_size, block_size)
    l_dim = (num_landcover_classes, block_size, block_size)
    feature = cntk.input_variable(f_dim, np.float32)
    label = cntk.input_variable(l_dim, np.float32)

    # Define the minibatch source
    minibatch_source = MyDataSource(f_dim, l_dim, number_of_workers, input_dir,
                                    minibatches_per_image)
    input_map = {
        feature: minibatch_source.streams.features,
        label: minibatch_source.streams.labels
    }

    # Define the model
    model = model_mini_pub.model(num_landcover_classes, block_size, 2,
                                 [64, 32, 32, 32])(feature)

    # Define the loss function and metric. Note that loss is not computed
    # directly on the model's output; the edges are first dropped.
    output = center_square(
        cntk.reshape(model, (num_landcover_classes, block_size, block_size)),
        block_size, padding)
    label_center = center_square(label, block_size, padding)
    mean_ce, pe = criteria(label_center, output, block_size,
                           num_landcover_classes, [0.0, 1.0, 1.0, 1.0, 1.0])

    # Create the progress writer, learner, and trainer (which will be a
    # distributed trainer if number_of_workers > 1)
    progress_writers = [
        cntk.logging.progress_print.ProgressPrinter(tag='Training',
                                                    num_epochs=num_epochs,
                                                    freq=epoch_size,
                                                    rank=my_rank)
    ]

    lr_per_mb = [0.0001] * 30 + [0.00001] * 30 + [0.000001]
    lr_per_sample = [lr / minibatch_size for lr in lr_per_mb]
    lr_schedule = cntk.learning_rate_schedule(lr_per_sample,
                                              epoch_size=epoch_size,
                                              unit=cntk.UnitType.sample)
    learner = cntk.rmsprop(model.parameters,
                           lr_schedule,
                           0.95,
                           1.1,
                           0.9,
                           1.1,
                           0.9,
                           l2_regularization_weight=0.00001)

    if number_of_workers > 1:
        parameter_learner = distributed.data_parallel_distributed_learner(
            learner, num_quantization_bits=32)
        trainer = cntk.Trainer(output, (mean_ce, pe), parameter_learner,
                               progress_writers)
    else:
        trainer = cntk.Trainer(output, (mean_ce, pe), learner,
                               progress_writers)

    # Perform the training! Note that some progress output will be generated by
    # each of the workers.
    if my_rank == 0:
        print('Retraining model for {} epochs.'.format(num_epochs))
        print('Found {} workers'.format(number_of_workers))
        print('Printing progress every {} minibatches'.format(
            minibatches_per_epoch))
        cntk.logging.progress_print.log_number_of_parameters(model)
    training_session(trainer=trainer,
                     max_samples=num_epochs * epoch_size,
                     mb_source=minibatch_source,
                     mb_size=minibatch_size,
                     model_inputs_to_streams=input_map,
                     checkpoint_config=CheckpointConfig(
                         frequency=epoch_size,
                         filename=os.path.join(output_dir,
                                               'trained_checkpoint.model'),
                         preserve_all=True),
                     progress_frequency=epoch_size).train()

    distributed.Communicator.finalize()
    if my_rank == 0:
        trainer.model.save(os.path.join(output_dir, 'trained.model'))
    return
Ejemplo n.º 4
0
def retrain_model(map_filename, output_dir, num_classes, epoch_size,
                  model_filename, num_epochs, model_type, retraining_type):
    ''' Coordinates retraining after MAP file creation '''

    # load minibatch and model
    minibatch_source = create_minibatch_source(map_filename, num_classes)

    image_input = cntk.ops.input_variable((3, 224, 224))
    label_input = cntk.ops.input_variable((num_classes))
    input_map = {
        image_input: minibatch_source.streams.features,
        label_input: minibatch_source.streams.labels
    }

    if model_type == 'alexnet':
        model = load_alexnet_model(image_input, num_classes, model_filename,
                                   retraining_type)
    elif model_type == 'resnet18':
        model = load_resnet18_model(image_input, num_classes, model_filename,
                                    retraining_type)

    # Set learning parameters
    ce = cntk.losses.cross_entropy_with_softmax(model, label_input)
    pe = cntk.metrics.classification_error(model, label_input)
    l2_reg_weight = 0.0005
    lr_per_sample = [0.00001] * 33 + [0.000001] * 33 + [0.0000001]
    momentum_time_constant = 10
    mb_size = 16
    lr_schedule = cntk.learners.learning_rate_schedule(
        lr_per_sample, unit=cntk.UnitType.sample)
    mm_schedule = cntk.learners.momentum_as_time_constant_schedule(
        momentum_time_constant)

    # Instantiate the appropriate trainer object
    my_rank = distributed.Communicator.rank()
    num_workers = distributed.Communicator.num_workers()
    num_minibatches = int(np.ceil(epoch_size / mb_size))

    progress_writers = [
        cntk.logging.progress_print.ProgressPrinter(tag='Training',
                                                    num_epochs=num_epochs,
                                                    freq=num_minibatches,
                                                    rank=my_rank)
    ]
    learner = cntk.learners.fsadagrad(parameters=model.parameters,
                                      lr=lr_schedule,
                                      momentum=mm_schedule,
                                      l2_regularization_weight=l2_reg_weight)
    if num_workers > 1:
        parameter_learner = distributed.data_parallel_distributed_learner(
            learner, num_quantization_bits=32)
        trainer = cntk.Trainer(model, (ce, pe), parameter_learner,
                               progress_writers)
    else:
        trainer = cntk.Trainer(model, (ce, pe), learner, progress_writers)

    # Print summary lines to stdout and perform training
    if my_rank == 0:
        print('Retraining model for {} epochs.'.format(num_epochs))
        print('Found {} workers'.format(num_workers))
        print('Printing progress every {} minibatches'.format(num_minibatches))
        cntk.logging.progress_print.log_number_of_parameters(model)

    training_session(trainer=trainer,
                     max_samples=num_epochs * epoch_size,
                     mb_source=minibatch_source,
                     mb_size=mb_size,
                     model_inputs_to_streams=input_map,
                     checkpoint_config=CheckpointConfig(
                         frequency=epoch_size,
                         filename=os.path.join(output_dir,
                                               'retrained_checkpoint.model')),
                     progress_frequency=epoch_size).train()

    distributed.Communicator.finalize()
    if my_rank == 0:
        trainer.model.save(os.path.join(output_dir, 'retrained.model'))

    return (my_rank)
Ejemplo n.º 5
0
def main(map_filename, output_dir, pretrained_model_filename):
    ''' Retrain and save the existing AlexNet model '''
    num_epochs = 50
    mb_size = 16

    # Find the number of classes and the number of samples per epoch
    labels = set([])
    epoch_size = 0
    with open(map_filename, 'r') as f:
        for line in f:
            labels.add(line.strip().split('\t')[1])
            epoch_size += 1
        sample_image_filename = line.strip().split('\t')[0]
    num_classes = len(labels)
    num_minibatches = int(epoch_size // mb_size)

    # find the typical image dimensions
    image_height, image_width, num_channels = np.asarray(
        Image.open(sample_image_filename)).shape
    assert num_channels == 3, 'Expected to find images with 3 color channels'
    assert (image_height == 224) and (image_width == 224), \
        'Expected to find images of size 224 pixels x 224 pixels'

    # Create the minibatch source
    minibatch_source = create_reader(map_filename, image_height, image_width,
                                     num_channels, num_classes)

    # Input variables denoting features, rois and label data
    image_input = cntk.ops.input_variable(
        (num_channels, image_height, image_width))
    label_input = cntk.ops.input_variable((num_classes))

    # define mapping from reader streams to network inputs
    input_map = {
        image_input: minibatch_source.streams.features,
        label_input: minibatch_source.streams.labels
    }

    # Instantiate the Fast R-CNN prediction model and loss function
    model = modify_model(pretrained_model_filename, image_input, num_classes)
    ce = cntk.losses.cross_entropy_with_softmax(model, label_input)
    pe = cntk.metrics.classification_error(model, label_input)

    # Set learning parameters
    l2_reg_weight = 0.0005
    lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001]
    momentum_time_constant = 10
    lr_schedule = cntk.learners.learning_rate_schedule(
        lr_per_sample, unit=cntk.UnitType.sample)
    mm_schedule = cntk.learners.momentum_as_time_constant_schedule(
        momentum_time_constant)

    # Instantiate the trainer object
    progress_writers = [
        cntk.logging.progress_print.ProgressPrinter(tag='Training',
                                                    num_epochs=num_epochs,
                                                    freq=num_minibatches)
    ]
    learner = cntk.learners.momentum_sgd(
        model.parameters,
        lr_schedule,
        mm_schedule,
        l2_regularization_weight=l2_reg_weight)
    trainer = cntk.Trainer(model, (ce, pe), learner, progress_writers)

    # Perform retraining and save the resulting model
    cntk.logging.progress_print.log_number_of_parameters(model)
    training_session(trainer=trainer,
                     max_samples=num_epochs * epoch_size,
                     mb_source=minibatch_source,
                     mb_size=mb_size,
                     model_inputs_to_streams=input_map,
                     checkpoint_config=CheckpointConfig(
                         frequency=epoch_size,
                         filename=os.path.join(output_dir,
                                               'retrained_checkpoint.model')),
                     progress_frequency=epoch_size).train()
    model.save(os.path.join(output_dir, 'retrained.model'))
    return